In [1]:
import os
import json
import numpy as np
import pandas as pd
import torch
import random
from tqdm import tqdm
from torch.nn import CrossEntropyLoss
import logging
import cv2

from scipy.optimize import minimize
from tqdm import tqdm
from multiprocessing import Pool

from rsna2024.runner import Runner
from rsna2024.utils import rsna_lumbar_metric

cv2.setNumThreads(0)
cv2.ocl.setUseOpenCL(False)
os.environ["OMP_NUM_THREADS"] = "1"
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["MKL_NUM_THREADS"] = "1"
os.environ["VECLIB_MAXIMUM_THREADS"] = "1"
os.environ["NUMEXPR_NUM_THREADS"] = "1"

logging.getLogger('albumentations').setLevel(logging.WARNING)

coord_filename = 'train_label_coordinates_predicted_v2_{}_{}_{}.csv'.format(593, 654, 603)

root_dir = '/media/latlab/MR/projects/kaggle-rsna-2024'
data_dir = os.path.join(root_dir, 'data', 'raw')
img_dir = os.path.join(data_dir, 'train_images')
df_series = pd.read_csv(
    os.path.join(data_dir, 'train_series_descriptions.csv'),
    dtype={'study_id': 'str', 'series_id': 'str'},
)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

levels = ['L1/L2', 'L2/L3', 'L3/L4', 'L4/L5', 'L5/S1']
sides = ['left', 'right']
coord_df = pd.read_csv(
    os.path.join(root_dir, 'data', 'processed', coord_filename),
    dtype={'study_id': 'str', 'series_id': 'str'},
)


def load_config(config_path):
    with open(config_path) as f:
        return json.load(f)


def get_metric(y_true, y_pred):
    y_true = y_true.clone()
    y_true[y_true == -100] = 0
    solution, submission = rsna_lumbar_metric.prepare_data(y_true, y_pred)
    metric = rsna_lumbar_metric.score(
        solution=solution,
        submission=submission,
        row_id_column_name='row_id',
        any_severe_scalar=1.0,
    )
    return metric


def print_loss(baseline_loss, preds, ys):
    loss = CrossEntropyLoss(weight=torch.tensor([1.0, 2.0, 4.0]).to(device))(
        torch.tensor(preds).to(device), torch.tensor(ys).to(device)
    )
    print(f'\nLoss: {baseline_loss:.4f} -> {loss:.4f}')
    print(
        f'Improvement: {baseline_loss - loss:.4f}, {100 * (baseline_loss - loss) / baseline_loss:.1f}%'
    )


spinal_baseline, foraminal_baseline, subarticular_baseline = None, None, None

## Split ROI Models
### Spinal

In [None]:
spinal_model_name = 'giddy-monkey-1266'
cfg = load_config(os.path.join(root_dir, 'models', 'rsna-2024-' + spinal_model_name, 'config.json'))
spinal_preds, spinal_ys, spinal_data = Runner(
    cfg, model_name='rsna-2024-' + spinal_model_name
).predict(df_coordinates=coord_df)

# unfolding 5 cv folds and 5 levels
spinal_preds = np.moveaxis(spinal_preds.reshape(5, 5, -1, 3), 1, -1).reshape(-1, 3, 5)
spinal_ys = np.moveaxis(spinal_ys.reshape(5, 5, -1), 1, -1).reshape(-1, 5)

spinal_baseline = 0.2714
print_loss(spinal_baseline, spinal_preds, spinal_ys)

### Foraminal

In [None]:
foraminal_model_name = 'hardy-voice-1244'
cfg = load_config(os.path.join(root_dir, 'models', 'rsna-2024-' + foraminal_model_name, 'config.json'))
foraminal_preds, foraminal_ys, foraminal_data = Runner(
    cfg, model_name='rsna-2024-' + foraminal_model_name
).predict(df_coordinates=coord_df)

# unfolding 5 cv folds, 5 levels, (study_ids) and 2 sides
foraminal_preds = np.moveaxis(np.moveaxis(foraminal_preds.reshape(5, 5, -1, 2, 3), 3, -1), 1, -1).reshape(-1, 3, 10)
foraminal_ys = np.moveaxis(np.moveaxis(foraminal_ys.reshape(5, 5, -1, 2), 3, -1), 1, -1).reshape(-1, 10)

foraminal_baseline = 0.5308
print_loss(foraminal_baseline, foraminal_preds, foraminal_ys)

### Subarticular

In [None]:
subarticular_model_name = 'fiery-meadow-1254'
cfg = load_config(
    os.path.join(root_dir, 'models', 'rsna-2024-' + subarticular_model_name, 'config.json')
)
subarticular_preds, subarticular_ys, subarticular_data = Runner(
    cfg, model_name='rsna-2024-' + subarticular_model_name
).predict(df_coordinates=coord_df)

# unfolding 5 cv folds, 5 levels and 2 sides
subarticular_preds = np.moveaxis(
    np.moveaxis(subarticular_preds.reshape(5, 5, -1, 2, 3), 3, -1), 1, -1
).reshape(-1, 3, 10)
subarticular_ys = np.moveaxis(
    np.moveaxis(subarticular_ys.reshape(5, 5, -1, 2), 3, -1), 1, -1
).reshape(-1, 10)

subarticular_baseline = 0.5807
print_loss(subarticular_baseline, subarticular_preds, subarticular_ys)

### Combined

In [None]:
preds = torch.tensor(np.concatenate([spinal_preds, foraminal_preds, subarticular_preds], axis=-1)).to(device)
ys = torch.tensor(np.concatenate([spinal_ys, foraminal_ys, subarticular_ys], axis=-1)).to(device)

baseline_metric = 0.4151
metric = get_metric(ys, preds)
print(f'Baseline metric: {baseline_metric:.4f} -> {metric:.4f}')
print(f'Improvement: {baseline_metric - metric:.4f}, {100 * (baseline_metric - metric) / baseline_metric:.1f}%')

## Global ROI model

In [None]:
model_name = 'dashing-spaceship-1252'
cfg = load_config(os.path.join(root_dir, 'models', 'rsna-2024-' + model_name, 'config.json'))
preds_global, ys_global, data_global = Runner(cfg, model_name='rsna-2024-' + model_name).predict(df_coordinates=coord_df)

# unfolding 5 cv folds and 5 levels
preds_global = np.moveaxis(preds_global.reshape(5, 5, -1, 3, 5), 1, -1).reshape(-1, 3, 25)
ys_global = np.moveaxis(ys_global.reshape(5, 5, -1, 5), 1, -1).reshape(-1, 25)

if spinal_baseline is None:
    spinal_baseline = 0.2739
if foraminal_baseline is None:
    foraminal_baseline = 0.5183
if subarticular_baseline is None:
    subarticular_baseline = 0.5855
print_loss(spinal_baseline, preds_global[..., :5], ys_global[..., :5])
print_loss(foraminal_baseline, preds_global[..., 5:15], ys_global[..., 5:15])
print_loss(subarticular_baseline, preds_global[..., 15:], ys_global[..., 15:])

global_baseline = 0.4184
preds_global = torch.tensor(preds_global).to(device)
ys_global = torch.tensor(ys_global).to(device)
global_metric = get_metric(ys_global, preds_global)
print(f'\nGlobal metric: {global_baseline:.4f} -> {global_metric:.4f}')
print(f'Improvement: {global_baseline - global_metric:.4f}, {100 * (global_baseline - global_metric) / global_baseline:.1f}%')

## Ensemble

In [None]:
idx = [data_global['study_id'].tolist().index(i) for i in spinal_data['study_id'].tolist()]
preds_global_sorted = preds_global[idx]
ys_global_sorted = ys_global[idx]

assert torch.equal(ys, ys_global_sorted)
preds_ensemble = (preds + preds_global_sorted) / 2

baseline_metric = min(metric, global_metric)
ensemble_metric = get_metric(ys, preds_ensemble)
print(f'\nGlobal metric: {baseline_metric:.4f} -> {ensemble_metric:.4f}')
print(f'Improvement: {baseline_metric - ensemble_metric:.4f}, {100 * (baseline_metric - ensemble_metric) / baseline_metric:.1f}%')