In [2]:
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd

# Configs

In [None]:
!cp -r /kaggle/input/ARISE-2025/* .
!chmod +x preprocess_for_yolo.sh
!chmod +x preprocess_for_classifier.sh

In [None]:
!pip install -q -r requirements.txt
!pip install -q monai

  Preparing metadata (setup.py) ... [?25l[?25hdone
  Building wheel for s4torch (setup.py) ... [?25l[?25hdone


In [None]:
%%writefile /kaggle/working/config/classification/train.yaml
# Model configuration
random_state: 43
defaults:
  - model: timm
  - optimizer: adamw
  - train_dataset: base_ds
  - val_dataset: base_ds
  - scheduler: step_lr

# Training configuration
training:
  normalized_jsn_class_weights: [0.363, 0.624, 0.272, 0.575, 3.827]
  normalized_erosion_class_weights: [0.012, 0.183, 0.391, 2.571, 7.714, 1.058]
  num_workers: 4
  epochs: 10
  batch_size: 16
  save_dir: "/kaggle/working/checkpoints/classification"  

# WandB configuration
wandb:
  api_key: ""
  project: "ARISE-2025"

Overwriting /kaggle/working/config/classification/train.yaml


In [6]:
%%writefile /kaggle/working/config/classification/model/timm.yaml
_target_: classification.models.timms.timm_Model
model_name: "timm/convnext_tiny.in12k"
n_output_layers: 2
output_layers_shapes:
  6: "erosion"
  5: "jsn"
pretrained: True
drop_rate: 0.1

Writing /kaggle/working/config/classification/model/timm.yaml


In [61]:
%%writefile /kaggle/working/config/classification/model/rnn.yaml
_target_: classification.models.rnn.S4ImageClassifier
img_height: 128
img_width: 128
d_model: 128
n_blocks: 4
dropout: 0.2
num_classes1: 5
num_classes2: 6

Overwriting /kaggle/working/config/classification/model/rnn.yaml


In [69]:
%%writefile /kaggle/working/config/classification/optimizer/adamw.yaml
_target_: torch.optim.AdamW
head_lr: 3e-4
lr: 5e-05
weight_decay: 1

Overwriting /kaggle/working/config/classification/optimizer/adamw.yaml


In [9]:
%%writefile /kaggle/working/config/classification/scheduler/step_lr.yaml
_target_: torch.optim.lr_scheduler.StepLR
step_size: 2
gamma: 0.5

Overwriting /kaggle/working/config/classification/scheduler/step_lr.yaml


In [None]:
%%writefile /kaggle/working/classification/models/timms.py
import torch
import torchvision

from torch import nn
import timm


def init_weights(m):
    if isinstance(m, nn.Linear) or isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_normal_(m.weight)
        if m.bias is not None:
            torch.nn.init.zeros_(m.bias)


class timm_Model(nn.Module):
    def __init__(self,
                 model_name,
                 drop_rate=0.0,
                 n_output_layers=2,
                 output_layers_shapes={
                    5: "erosion",
                    4: "jsn"
                 },
                 pretrained=True):
        super().__init__()

        self.output_mapping = output_layers_shapes
        self.backbone = timm.create_model(model_name, pretrained=pretrained, drop_rate=drop_rate)
        self.heads = nn.ModuleList([
            nn.Sequential(
                nn.Linear(self.backbone.head.fc.in_features, self.backbone.head.fc.in_features // 2),
                nn.GELU(),
                nn.Linear(self.backbone.head.fc.in_features // 2, key)
            )
            for key in self.output_mapping.keys()
        ])
        self.backbone.head = nn.Identity()
            
        self.heads.apply(init_weights)
    
        
    def forward(self, x):
        feats = self.backbone(x)
        feats = feats[:, :, 4, 4]
        head_outputs = {
            self.output_mapping[self.heads[i][-1].out_features]: self.heads[i](feats)
            for i in range(len(self.heads))
        }
        return head_outputs


if __name__ == "__main__":
    model = timm_Model()
    print(model(torch.randn((1, 3, 224, 224))))

Overwriting /kaggle/working/classification/models/timms.py


In [None]:
%%writefile /kaggle/working/config/classification/train_dataset/base_ds.yaml
_target_: data_utils.datasets.ImageClassificationDataset
img_dir: "/kaggle/working/data/classifier_data/train"
transform:
  _target_: monai.transforms.Compose
  transforms:
    - _target_: monai.transforms.LoadImage
    - _target_: data_utils.datasets.Processor
      clip_limit: 1.0
    - _target_: data_utils.datasets.DynamicSquarePad
    - _target_: monai.transforms.Resize
      spatial_size: [224, 224]
      mode: bilinear
    - _target_: monai.transforms.RandFlip
      prob: 0.5
      spatial_axis: 1
    - _target_: monai.transforms.NormalizeIntensity
    - _target_: monai.transforms.EnsureType
      data_type: tensor

Overwriting /kaggle/working/config/classification/train_dataset/base_ds.yaml


In [None]:
%%writefile /kaggle/working/config/classification/val_dataset/base_ds.yaml
_target_: data_utils.datasets.ImageClassificationDataset
img_dir: "/kaggle/working/data/classifier_data/val"
transform:
  _target_: monai.transforms.Compose
  transforms:
    - _target_: monai.transforms.LoadImage
    - _target_: data_utils.datasets.Processor
      clip_limit: 1.0
    - _target_: data_utils.datasets.DynamicSquarePad
    - _target_: monai.transforms.Resize
      spatial_size: [224, 224]
      mode: bilinear
    - _target_: monai.transforms.NormalizeIntensity
    - _target_: monai.transforms.EnsureType
      data_type: tensor

Overwriting /kaggle/working/config/classification/val_dataset/base_ds.yaml


In [None]:
%%writefile /kaggle/working/data_utils/datasets.py
from monai.transforms import Compose, SpatialPad
from torch.utils.data import Dataset, DataLoader
from hydra.utils import instantiate
from PIL import Image
import pandas as pd
import numpy as np
import torch
import cv2
import os


class DynamicSquarePad:
    def __call__(self, img):
        # Find the largest spatial dimension
        spatial_shape = img.shape[1:]  # Assuming channel-first data
        max_dim = max(spatial_shape)
        
        # Create a spatial pad transform with square dimensions
        pad = SpatialPad(spatial_size=[max_dim] * len(spatial_shape))
        return pad(img)


class Processor:
    def __init__(self, clip_limit=2.0, tile_grid_size=(8, 8)):
        """
        Initialize CLAHE processor.
        
        Args:
            clip_limit: Threshold for contrast limiting
            tile_grid_size: Size of grid for histogram equalization
        """
        self.clip_limit = clip_limit
        self.tile_grid_size = tile_grid_size
        self.clahe = cv2.createCLAHE(clipLimit=clip_limit, tileGridSize=tile_grid_size)
        
    def __call__(self, img):
        """
        Apply CLAHE to an image.
        
        Args:
            img: NumPy array in HWC format (Height, Width, Channels) or grayscale
            
        Returns:
            Processed image in the same format as input
        """
        res = self.clahe.apply(img[:, :, 0].numpy().astype(np.uint8)).T
        res = np.stack((res, res, res), axis=0).astype(float) / 255
        return torch.from_numpy(res).double()


class ImageClassificationDataset(Dataset):
    def __init__(self, img_dir, transform):
        self.img_dir = img_dir
        self.entries = os.listdir(img_dir)
        self.transform = transform
    
    
    def __len__(self):
        return len(self.entries)
    
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.entries[idx])
        erosion_score, jsn_score = os.path.splitext(self.entries[idx])[0].split("_")[-2:]

        img = self.transform(img_path)
        return img, int(erosion_score), int(jsn_score)


class EvalImageDataset(Dataset):
    def __init__(self, image_dir, bbox_csv, transform=None):
        self.image_dir = image_dir
        self.transform = transform
        self.bbox_df = pd.read_csv(bbox_csv)
        self.samples = self._load_samples()

    def _load_samples(self):
        samples = []
        for _, row in self.bbox_df.iterrows():
            patient_id = row["patient_id"]
            joint_id = row["joint_id"]
            image_name = f"{int(patient_id)}_{int(joint_id)}.jpeg"
            image_path = os.path.join(self.image_dir, image_name)
            if os.path.exists(image_path):
                samples.append((image_path, patient_id, joint_id, row["xcenter"], row["ycenter"], row["dx"], row["dy"]))
        return samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        image_path, patient_id, joint_id, xcenter, ycenter, dx, dy = self.samples[idx]

        if self.transform:
            image = self.transform(image_path)
        return {
            "image": image,
            "patient_id": patient_id,
            "joint_id": joint_id,
            "xcenter": xcenter,
            "ycenter": ycenter,
            "dx": dx,
            "dy": dy,
        }


def initialize_data(cfg):
    train_dataset = instantiate(cfg.train_dataset)
    val_dataset = instantiate(cfg.val_dataset)
    train_loader = DataLoader(
        train_dataset,
        batch_size=cfg.training.batch_size,
        shuffle=True,
        num_workers=cfg.training.num_workers,
        pin_memory=True
    )
    val_loader = DataLoader(
        val_dataset,
        batch_size=cfg.training.batch_size,
        shuffle=False,
        num_workers=cfg.training.num_workers,
        pin_memory=True
    )
    return train_loader, val_loader

Overwriting /kaggle/working/data_utils/datasets.py


In [None]:
%%writefile /kaggle/working/classification/train.py
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import StepLR
from accelerate import Accelerator
import hydra
from omegaconf import DictConfig, OmegaConf
from hydra.utils import instantiate
import wandb
from sklearn.metrics import balanced_accuracy_score, accuracy_score
from tqdm import tqdm
from metrics import calculate_accuracy
from data_utils.datasets import initialize_data
from classification.utils import save_checkpoint
from accelerate.utils import set_seed
import torch.nn.functional as F
import monai


set_seed(43)
monai.utils.set_determinism(seed=43)
accelerator = Accelerator()


class FocalLoss(nn.Module):
    
    def __init__(self, gamma=2, weights=None):
        super().__init__()
        
        self.gamma = gamma
        self.weights = weights
        self.ce_loss = nn.CrossEntropyLoss(reduction='none')
        
    def forward(self, inputs, targets):
        
        ce_loss = self.ce_loss(inputs, targets)
        pt = torch.exp(-ce_loss)
        weights = self.weights.to(inputs.device).gather(0, targets.view(-1))
        loss = weights * (1-pt)**self.gamma * ce_loss

        return loss.sum() / weights.sum()


def train_epoch(cfg, model, train_loader, optimizer, criterion_erosion, criterion_jsn, accelerator):
    model.train()
    total_loss = 0.0
    for batch in tqdm(train_loader, desc="Training"):
        img, erosion_score, jsn_score = batch
        img = img.double()
        
        outputs = model(img)
        
        loss_jsn = criterion_jsn(outputs["jsn"], jsn_score)
        loss_erosion = criterion_erosion(outputs["erosion"], erosion_score)
        loss = loss_jsn + loss_erosion

        accelerator.backward(loss)
        optimizer.step()
        optimizer.zero_grad()
        
        total_loss += loss.item()

        # Log to WandB
        if accelerator.is_local_main_process:
            wandb.log({
                "train_loss": loss.item(),
                "train_loss_jsn": loss_jsn.item(),
                "train_loss_erosion": loss_erosion.item(),
                "lr": optimizer.param_groups[0]["lr"]
            })

    return total_loss / len(train_loader)

# Validation function
def validate_epoch(cfg, model, val_loader, criterion_erosion, criterion_jsn, accelerator):
    model.eval()
    total_loss = 0.0
    jsn_accuracy, erosion_accuracy = 0.0, 0.0

    jsn_res = []
    jsn_labels = []
    erosion_res = []
    erosion_labels = []
    
    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Validation"):
            img, erosion_score, jsn_score = batch
            img = img.double()
            
            outputs = model(img)
            
            loss_jsn = criterion_jsn(outputs["jsn"], jsn_score)
            loss_erosion = criterion_erosion(outputs["erosion"], erosion_score)
            loss = loss_jsn + loss_erosion

            total_loss += loss.item()
            
            jsn_res.extend(outputs["jsn"].detach().cpu().argmax(dim=1).tolist())
            jsn_labels.extend(jsn_score.detach().cpu().tolist())
            
            erosion_res.extend(outputs["erosion"].detach().cpu().argmax(dim=1).tolist())
            erosion_labels.extend(erosion_score.detach().cpu().tolist())
            
            # Calculate weighted-accuracy
        jsn_accuracy = balanced_accuracy_score(jsn_labels, jsn_res)
        erosion_accuracy = balanced_accuracy_score(erosion_labels, erosion_res)

    avg_loss = total_loss / len(val_loader)

    # Log to WandB
    if accelerator.is_local_main_process:
        wandb.log({
            "val_loss": avg_loss,
            "val_jsn_accuracy": jsn_accuracy,
            "val_erosion_accuracy": erosion_accuracy
        })

    return avg_loss, jsn_accuracy, erosion_accuracy


@hydra.main(config_path="../config/classification", config_name="train")
def main(cfg: DictConfig):

    if accelerator.is_local_main_process:
        os.environ["WANDB_API_KEY"] = cfg.wandb.api_key
        wandb.init(project=cfg.wandb.project, config=OmegaConf.to_container(cfg, resolve=True))

    model = instantiate(cfg.model).double()

    
    # optimizer = instantiate(cfg.optimizer, params=model.parameters())
    optimizer = eval(cfg.optimizer._target_)([
        {"params": model.heads.parameters(), "lr": cfg.optimizer.head_lr, "weight_decay": cfg.optimizer.weight_decay},
        {"params": model.backbone.parameters(), "lr": cfg.optimizer.lr, "weight_decay": cfg.optimizer.weight_decay}
    ])
    scheduler = instantiate(cfg.scheduler, optimizer=optimizer)
    train_loader, val_loader = initialize_data(cfg)

    criterion_erosion = nn.CrossEntropyLoss(weight=torch.tensor(cfg.training.normalized_erosion_class_weights).double().cuda())
    criterion_jsn = nn.CrossEntropyLoss(weight=torch.tensor(cfg.training.normalized_jsn_class_weights).double().cuda())
    
    model, optimizer, train_loader, val_loader = accelerator.prepare(
        model, optimizer, train_loader, val_loader
    )

    # Training loop
    for epoch in range(cfg.training.epochs):
        train_loss = train_epoch(cfg, model, train_loader, optimizer, criterion_erosion, criterion_jsn, accelerator)
        val_loss, jsn_accuracy, erosion_accuracy = validate_epoch(cfg, model, val_loader, criterion_erosion, criterion_jsn, accelerator)

        # Step the scheduler
        scheduler.step()

        # Print epoch results
        if accelerator.is_local_main_process:
            print(f"Epoch {epoch + 1}/{cfg.training.epochs}")
            print(f"Train Loss: {train_loss:.4f}")
            print(f"Val Loss: {val_loss:.4f}")
            print(f"JSN Accuracy: {jsn_accuracy:.4f}")
            print(f"Erosion Accuracy: {erosion_accuracy:.4f}")
            
        if accelerator.is_local_main_process:
            save_checkpoint(model, epoch + 1, jsn_accuracy, erosion_accuracy, cfg.training.save_dir)


if __name__ == "__main__":
    main()

Overwriting /kaggle/working/classification/train.py


In [15]:
!./preprocess_for_yolo.sh \
--csv /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/bboxes.csv \
--img_dir /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/jpeg \
--split_info_path /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/train_val_split.json

Updated PYTHONPATH: /kaggle/lib/kagglegym:/kaggle/lib:/kaggle/working
Created YOLO annotation file: data/yolo_dataset/labels/train/2.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/3.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/4.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/5.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/8.txt
Created YOLO annotation file: data/yolo_dataset/labels/val/9.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/11.txt
Created YOLO annotation file: data/yolo_dataset/labels/val/15.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/16.txt
Created YOLO annotation file: data/yolo_dataset/labels/val/17.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/18.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/19.txt
Created YOLO annotation file: data/yolo_dataset/labels/train/24.txt
Created YOLO annotation file: data/yolo_dataset/labels/tra

In [16]:
!./preprocess_for_classifier.sh \
--scores_csv /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/scores.csv \
--bbox_file /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/bboxes.csv \
--image_dir /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/jpeg \
--split_subsets_by_id /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/train_val_split.json

Updated PYTHONPATH: /kaggle/lib/kagglegym:/kaggle/lib:/kaggle/working
Running data_utils/average_scores.py...
Input CSV: 
Output CSV: data/averaged_scores.csv
Transformed data saved to data/averaged_scores.csv
Averaged scores saved to data/averaged_scores.csv
Running data_utils/merge_scores_bbox_files.py...
Bounding Box File: /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/bboxes.csv
Score File: data/averaged_scores.csv
Output File: data/merged_score_file.csv
Merged scores and bounding boxes saved to data/merged_score_file.csv
Running data_utils/crop_images.py...
Label File: data/merged_score_file.csv
Image Directory: /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/jpeg
Output Directory: data/classifier_data
Split Subsets By ID: /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/train_val_split.json
Cropped images saved to data/classifier_data
All steps completed successfully!


# Train

In [17]:
%%writefile train.sh
export PYTHONPATH=$PYTHONPATH:/kaggle/working
export HYDRA_FULL_ERROR=1
echo "Updated PYTHONPATH: $PYTHONPATH"
accelerate launch classification/train.py

Writing train.sh


In [85]:
!chmod +x train.sh
!./train.sh

Updated PYTHONPATH: /kaggle/lib/kagglegym:/kaggle/lib:/kaggle/working
2025-03-12 23:07:43.284671: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-12 23:07:43.308959: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-12 23:07:43.315780: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  @hydra.main(config_path="../config/classification", config_name="train")
See https://hydra.cc/docs/1.2/upgrades/1.1_to_1.2/changes_to_job_working_dir/ for m

# Submit

In [None]:
!python detection/model/yolo/infer.py \
--model checkpoints/detection/best.pt \
--img_dir /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/eval_data \
--output_dir /kaggle/working/data/eval_detection_output \
--imgsz 640

In [None]:
!python data_utils/crop_eval.py \
--image_dir /kaggle/input/automated-scoring-in-rheumatoid-arthritis/dataset/eval_data \
--bbox_txt data/eval_detection_output/bbox_txts/all_bboxes.txt \
--output_dir data/eval_cropped_images

In [None]:
%%writefile submit.py
import argparse
import os
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from monai import transforms
from PIL import Image
import hydra
from omegaconf import DictConfig, OmegaConf
from hydra.utils import instantiate
from classification.utils import load_eval_model
from data_utils.datasets import EvalImageDataset, DynamicSquarePad


def predict_scores(cfg, model, dataloader, device):
    results = []
    with torch.no_grad():
        for batch in dataloader:
            batch = {key: value.to(device) for key, value in batch.items()}
            outputs = model(batch["image"].double())
            jsn_scores = outputs["jsn"].argmax(dim=1).cpu().numpy()  # Probability of positive class
            erosion_scores = outputs["erosion"].argmax(dim=1).cpu().numpy()  # Probability of positive class

            for i in range(len(batch["image"])):
                results.append({
                    "ID": f'{int(batch["patient_id"][i].item())}_{int(batch["joint_id"][i].item())}',
                    "patient_id": int(batch["patient_id"][i].item()),
                    "joint_id": int(batch["joint_id"][i].item()),
                    "xcenter": batch["xcenter"][i].item(),
                    "ycenter": batch["ycenter"][i].item(),
                    "dx": batch["dx"][i].item(),
                    "dy": batch["dy"][i].item(),
                    "jsn_score": int(jsn_scores[i]),
                    "erosion_score": int(erosion_scores[i]),
                    "PAD": 0.0
                })
    return results

# Main function
@hydra.main(config_path="config/classification", config_name="submit")
def main(cfg: DictConfig):

    model = load_eval_model(cfg, cfg.inference.model_weights).to(cfg.inference.device).double()

    # Define transforms
    transform = transforms.Compose([
        transforms.LoadImage(),
        transforms.EnsureChannelFirst(),
        DynamicSquarePad(),
        transforms.Resize(
          spatial_size=[224, 224],
          mode="bilinear"
        ),
        transforms.NormalizeIntensity()
    ])

    # Create dataset and dataloader
    dataset = EvalImageDataset(cfg.inference.image_dir, cfg.inference.bbox_csv, transform=transform)
    dataloader = DataLoader(dataset, batch_size=cfg.inference.batch_size, shuffle=False)

    # Predict scores
    results = predict_scores(cfg, model, dataloader, cfg.inference.device)

    # Save results to CSV
    results_df = pd.DataFrame(results)
    results_df.to_csv(cfg.inference.output_csv, index=False)
    print(f"Predictions saved to {cfg.inference.output_csv}")

if __name__ == "__main__":
    main()

In [None]:
!ls -l checkpoints/classification

In [None]:
%%writefile /kaggle/working/config/classification/submit.yaml
defaults:
  - model: "timm"

inference:
  batch_size: 32
  model_weights: /kaggle/working/checkpoints/classification/model_epoch:4_jsn_accuracy:0.6620927173848217_erosion_accuracy:0.44785529285529285.pth
  bbox_csv: /kaggle/working/data/eval_detection_output/bbox_txts/all_bboxes.txt
  image_dir: /kaggle/working/data/eval_cropped_images
  output_csv: /kaggle/working/submit.csv
  device: "cuda:0"

In [None]:
%%writefile submit.sh
export PYTHONPATH=$PYTHONPATH:/kaggle/working
echo "Updated PYTHONPATH: $PYTHONPATH"
python3 submit.py

In [None]:
!chmod +x submit.sh
!./submit.sh