# Import 

In [2]:
!pip install einops
!git clone https://github.com/b-ptiste/dlmi.git

Collecting einops
  Downloading einops-0.7.0-py3-none-any.whl.metadata (13 kB)
Downloading einops-0.7.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: einops
Successfully installed einops-0.7.0


In [3]:
# Standard library imports
import os
import random
import copy
import time

# Related third-party imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import KFold
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn import functional as F
from torchvision import transforms as T
from torchvision.transforms import v2
from tqdm import tqdm
import timm
import wandb
import uuid
from sklearn.metrics import balanced_accuracy_score

# log in different framework
path_root = "/kaggle/input/dlmi-challenge-b-and-s"
path_working = "/kaggle/working"
path_mae = "/kaggle/input/pretrain-mae"


# local library
from dlmi.src.model import ModelFactory
from dlmi.src.data import csv_processing, DataloaderFactory
from dlmi.src.utils import get_stratified_split
from dlmi.src.mae_pretraining import MAE_ViT, MAE_Encoder, MAE_Decoder, PatchShuffle
from dlmi.data.split import train_index as train_index_strat
from dlmi.data.split import val_index as val_index_strat

Cloning into 'dlmi'...
remote: Enumerating objects: 215, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 215 (delta 0), reused 3 (delta 0), pack-reused 210[K
Receiving objects: 100% (215/215), 39.75 KiB | 3.97 MiB/s, done.
Resolving deltas: 100% (80/80), done.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


# Data importation

Csv import with pre-processing, reformatting and normalisation.

In [4]:
df_annotation_train = csv_processing(
    os.path.join(path_root, "trainset", "trainset_true.csv")
)
df_annotation_test = csv_processing(
    os.path.join(path_root, "testset", "testset_data.csv")
)

Create train, val, test dataset.

In [5]:
mode_split = "strat"

In [6]:
test_index = df_annotation_test.index.tolist()

if mode_split == "auto":
    map_mode_index = get_stratified_split(df_annotation_train, df_annotation_test)

    train_index = map_mode_index["train"]
    val_index = map_mode_index["val"]

elif mode_split == "load":
    # log wandb
    run = wandb.init()
    artifact = run.use_artifact(
        "ii_timm/DLMI/submission958f5028e70811ee9d6b0242ac130202:v0", type="csv"
    )
    artifact_dir = artifact.download(root=path_working)
    wandb.finish()

    train_index = pd.read_csv(f"{path_working}/train_index.csv")[
        "train"
    ].values.tolist()
    val_index = pd.read_csv(f"{path_working}/val_index.csv")["val"].values.tolist()

elif mode_split == "strat":
    test_index = df_annotation_test.index.tolist()

if mode_split == "auto":
    map_mode_index = get_stratified_split(df_annotation_train, df_annotation_test)

    train_index = map_mode_index["train"]
    val_index = map_mode_index["val"]

elif mode_split == "load":
    # log wandb
    run = wandb.init()
    artifact = run.use_artifact(
        "ii_timm/DLMI/submission958f5028e70811ee9d6b0242ac130202:v0", type="csv"
    )
    artifact_dir = artifact.download(root=path_working)
    wandb.finish()

    train_index = pd.read_csv(f"{path_working}/train_index.csv")[
        "train"
    ].values.tolist()
    val_index = pd.read_csv(f"{path_working}/val_index.csv")["val"].values.tolist()

elif mode_split == "strat":
    train_index = train_index_strat
    val_index = val_index_strat

# ML

In [7]:
from imblearn.over_sampling import SMOTE

import optuna
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer

from sklearn.metrics import balanced_accuracy_score

In [8]:
df_train_clf = df_annotation_train.loc[train_index]
df_val_clf = df_annotation_train.loc[val_index]

y_train = df_train_clf["LABEL"]
y_val = df_val_clf["LABEL"]
df_train_clf = df_train_clf.drop(columns=["GENDER", "DOB", "LABEL", "ID"])
df_val_clf = df_val_clf.drop(columns=["GENDER", "DOB", "LABEL", "ID"])
df_train_clf["BIN_GENDER"] = df_train_clf["BIN_GENDER"].astype(int)
df_val_clf["BIN_GENDER"] = df_val_clf["BIN_GENDER"].astype(int)

In [9]:
def objective(trial):
    n_estimators = trial.suggest_int("n_estimators", 2, 10)
    max_depth = trial.suggest_int("max_depth", 1, 10)
    clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth)
    return cross_val_score(
        clf,
        df_train_clf.values,
        y_train,
        n_jobs=-1,
        cv=5,
        scoring=make_scorer(balanced_accuracy_score),
    ).mean()

## training

In [10]:
smote = SMOTE(random_state=1999)
df_train_clf, y_train = smote.fit_resample(df_train_clf, y_train)

In [11]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=100)

trial = study.best_trial
print("Balanced accuracy: {}".format(trial.value))
print("Best hyperparameters: {}".format(trial.params))

[I 2024-03-27 11:45:35,234] A new study created in memory with name: no-name-7127d3b0-c30d-44c4-94ab-4a0846e38848
[I 2024-03-27 11:45:37,112] Trial 0 finished with value: 0.8388888888888889 and parameters: {'n_estimators': 7, 'max_depth': 2}. Best is trial 0 with value: 0.8388888888888889.
[I 2024-03-27 11:45:37,152] Trial 1 finished with value: 0.8444444444444444 and parameters: {'n_estimators': 4, 'max_depth': 5}. Best is trial 1 with value: 0.8444444444444444.
[I 2024-03-27 11:45:37,188] Trial 2 finished with value: 0.8333333333333333 and parameters: {'n_estimators': 3, 'max_depth': 3}. Best is trial 1 with value: 0.8444444444444444.
[I 2024-03-27 11:45:37,224] Trial 3 finished with value: 0.861111111111111 and parameters: {'n_estimators': 4, 'max_depth': 5}. Best is trial 3 with value: 0.861111111111111.
[I 2024-03-27 11:45:37,260] Trial 4 finished with value: 0.861111111111111 and parameters: {'n_estimators': 3, 'max_depth': 6}. Best is trial 3 with value: 0.861111111111111.
[I 20

Balanced accuracy: 0.9
Best hyperparameters: {'n_estimators': 9, 'max_depth': 10}


In [12]:
# Extract the best hyperparameters
best_params = study.best_trial.params

# Create the XGBoost classifier with the best hyperparameters
best_clf = RandomForestClassifier(**best_params)

# Train the classifier on the full training dataset
best_clf.fit(df_train_clf.values, y_train)


y_pred = best_clf.predict(df_val_clf.values)

# Evaluate the model
print(balanced_accuracy_score(y_val, y_pred))

0.9


# Pretraining


Finetuning¶:

The following models are supported but basically all VIT, ResNet and EfficientNet from Timm are supported !

FROM : https://github.com/huggingface/pytorch-image-models/blob/main/timm/models/vision_transformer.py

    vit_tiny_patch16_224.augreg_in21k_ft_in1k
    vit_tiny_patch16_224.augreg_in21k
    vit_small_patch32_224.augreg_in21k
    vit_small_patch16_224.augreg_in21k (actual)

DINO

    vit_small_patch16_224.dino
    vit_small_patch8_224.dino

DINOv2

    vit_small_patch14_dinov2.lvd142m
    vit_small_patch14_reg4_dinov2.lvd142m

FlexiVit

    flexivit_small.300ep_in1k
    flexivit_small.1200ep_in1k

OpenAI

    vit_xsmall_patch16_clip_224.tinyclip_yfcc15m

Resnet

EfficientNet



In [13]:
cfg = {
    "who": "baptiste",  # or steven
    "no_wandb": True,
    "name_exp": "PatientModelAttentionTab - vit_tiny_patch16_224 - bottleneck",
    "lr": 1e-5,
    "batch_size": 124,
    "nb_epochs": 10,
    "timm": True,  # is the model from timm
    "timm_model": "vit_small_patch16_224.augreg_in21k",
    "dino": False,
    "dino_size": "",  # vits, vitb, vitl, vitg
    "adapter": "",  # bottleneck, adaptformer, lora, prompttuning
    "model_name": "",  #
    "pretrained": True,
    "pretrained_path": "",
    "nb_class": 2,
    "scheduler": None,  # could be empty or linear, expo ...
    "dataset_name": "DatasetPerImg",
    "device_1": "cuda:0",
    "device_2": "cuda:1",  # for double device
    # data augmentation
    "filename": f"{path_working}/submission_pretrain.csv",
    "filename_finetune": f"{path_working}/submission_finetune.csv",
    "sub_batch_size": 16,
    "latent_att": 512,
    "head_1": 8,  # 4
    "head_2": 2,
    "feature_dim": 384,  # DINOv2, VIT: 192 - 384
    "aggregation": "sum",  # sum, avg, max
    "beta_1": 0.5,
    "beta_2": 0.9,
    "weight_decay": 5e-2,
    "weight_class_0": 3.0,
    "weight_class_1": 1.0,
    "mask_ratio": 0.75,
    "image_size": 224,
    "patch_size": 16,
    "mae_pretrained": "",
    "with_tab": True,
    "mode_split": "strat",  # load, strat
    "degrees": (-5, 5),
    "translate": (0.1, 0.1),
    "scale": (1.0, 1.1),
    "fill": (255, 232, 201),
    "p": 0.1,
}

In [14]:
# transform
transform_train = T.Compose(
    [
        v2.PILToTensor(),
        v2.RandomHorizontalFlip(p=cfg["p"]),
        v2.RandomVerticalFlip(p=cfg["p"]),
        v2.RandomAffine(
            degrees=cfg["degrees"],
            translate=cfg["translate"],
            scale=cfg["scale"],
            fill=cfg["fill"],
        ),
    ]
)

transform_val = T.Compose(
    [
        v2.PILToTensor(),
    ]
)

In [15]:
# load data
data_factory = DataloaderFactory()
model_factory = ModelFactory()
dataloader_train = data_factory(
    cfg,
    mode="train",
    split_indexes=train_index,
    path_root=path_root,
    shuffle=True,
    drop_last=True,
    transform=transform_train,
    oversampling={"0": 1, "1": 1},
)

dataloader_val = data_factory(
    cfg,
    mode="train",
    split_indexes=val_index,
    path_root=path_root,
    shuffle=False,
    drop_last=False,
    transform=transform_val,
    oversampling={"0": 1, "1": 1},
)


# load model
model = model_factory(cfg).to(cfg["device_1"])

# optimizer and scheduler
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=cfg["lr"],
    betas=(cfg["beta_1"], cfg["beta_2"]),
    weight_decay=cfg["weight_decay"],
)

scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=cfg["nb_epochs"], eta_min=5e-6
)
# weight = torch.tensor([cfg['weight_class_0'], cfg['weight_class_1']]).to(cfg['device_1'])
# loss_fn = torch.nn.CrossEntropyLoss(weight=weight)

soft_max = torch.nn.Softmax(1)


cfg["nb_params_train"] = sum(p.numel() for p in model.parameters() if p.requires_grad)

cfg["nb_params_tot"] = sum(p.numel() for p in model.parameters())

print("=" * 50)
print(f'The model has {cfg["nb_params_tot"]} parameters')
print(f'The model has {cfg["nb_params_train"]} trainable parameters')
print(
    f'It represents {np.round(100 * cfg["nb_params_train"]/cfg["nb_params_tot"], 3)} % trainable parameters'
)
print("=" * 50)

The configuration is:
who : baptiste
no_wandb : True
name_exp : PatientModelAttentionTab - vit_tiny_patch16_224 - bottleneck
lr : 1e-05
batch_size : 124
nb_epochs : 10
timm : True
timm_model : vit_small_patch16_224.augreg_in21k
dino : False
dino_size : 
adapter : 
model_name : 
pretrained : True
pretrained_path : 
nb_class : 2
scheduler : None
dataset_name : DatasetPerImg
device_1 : cuda:0
device_2 : cuda:1
filename : /kaggle/working/submission_pretrain.csv
filename_finetune : /kaggle/working/submission_finetune.csv
sub_batch_size : 16
latent_att : 512
head_1 : 8
head_2 : 2
feature_dim : 384
aggregation : sum
beta_1 : 0.5
beta_2 : 0.9
weight_decay : 0.05
weight_class_0 : 3.0
weight_class_1 : 1.0
mask_ratio : 0.75
image_size : 224
patch_size : 16
mae_pretrained : 
with_tab : True
mode_split : strat
degrees : (-5, 5)
translate : (0.1, 0.1)
scale : (1.0, 1.1)
fill : (255, 232, 201)
p : 0.1
Loading timm model vit_small_patch16_224.augreg_in21k


model.safetensors:   0%|          | 0.00/120M [00:00<?, ?B/s]

No adapter used
The model has 21666434 parameters
The model has 21666434 trainable parameters
It represents 100.0 % trainable parameters


In [16]:
weight = torch.tensor([2.5, 1.0]).to("cuda:0")
loss_fn = torch.nn.CrossEntropyLoss(weight=weight)

In [17]:
best_accuracy = -1

if not cfg["no_wandb"]:
    run = wandb.init(
        project="DLMI",
        entity="ii_timm",
        name=cfg["name_exp"],
        config=cfg,
    )

print("Start Training ...")

for epoch in range(cfg["nb_epochs"]):
    model.train()
    print("=" * 50)
    print(" " * 15, f"Epoch {epoch}")
    print("=" * 50)
    train_pred = []
    train_label = []
    tain_cum_loss = 0
    for x, annotation in tqdm(dataloader_train):
        # reset gradient
        optimizer.zero_grad()

        # forward
        x = x.to(cfg["device_1"])
        x = model(x)

        # compute loss
        loss = loss_fn(x, annotation["LABEL"].to(cfg["device_1"]))
        pred = torch.argmax(soft_max(x), dim=1)
        tain_cum_loss += loss.item()

        train_pred.extend(pred.detach().cpu().tolist())
        train_label.extend(annotation["LABEL"].detach().cpu().tolist())

        # gradient decent
        loss.backward()
        optimizer.step()

    if scheduler is not None:
        scheduler.step()

    train_balance_acc = balanced_accuracy_score(train_pred, train_label)
    train_avg_loss = tain_cum_loss / len(dataloader_train)
    print(
        f"train_balance_acc : {np.round(train_balance_acc, 6)} / train_avg_loss : {np.round(train_avg_loss, 6)}"
    )

    val_pred = []
    val_label = []
    val_cum_loss = 0
    model.eval()
    for x, annotation in tqdm(dataloader_val):
        # forward
        x = x.to(cfg["device_1"])
        with torch.no_grad():
            x = model(x)

        # compute loss
        loss = loss_fn(x, annotation["LABEL"].to(cfg["device_1"]))
        pred = torch.argmax(soft_max(x), dim=1)
        val_cum_loss += loss.item()
        val_pred.extend(pred.detach().cpu().tolist())
        val_label.extend(annotation["LABEL"].detach().cpu().tolist())

    val_balance_acc = balanced_accuracy_score(val_pred, val_label)
    val_avg_loss = val_cum_loss / len(dataloader_val)
    print(
        f"val_balance_acc : {np.round(val_balance_acc, 6)} / val_avg_loss : {np.round(val_avg_loss, 6)}"
    )

    if val_balance_acc > best_accuracy:
        best_accuracy = val_balance_acc
        print("Improve avg accuracy :")

        save_path = os.path.join("./", "model" + str(epoch) + ".pt")
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scheduler_state_dict": scheduler.state_dict()
                if cfg["scheduler"] is not None
                else None,
            },
            save_path,
        )
        print("checkpoint saved to: {}".format(save_path))

    if not cfg["no_wandb"]:
        wandb.log(
            {
                "epoch": epoch,
                "balance_acc/train": train_balance_acc,
                "loss/train": train_avg_loss,
                "balance_acc/val": val_balance_acc,
                "loss/val": val_avg_loss,
            }
        )


if not cfg["no_wandb"]:
    model_artifact = wandb.Artifact(
        "model" + str(uuid.uuid1()).replace("-", ""), type="model"
    )
    model_artifact.add_file(save_path)
    wandb.log_artifact(model_artifact)

    description_artifact = wandb.Artifact(
        "description_model" + str(uuid.uuid1()).replace("-", ""), type="python"
    )

    description_artifact.add_file("/kaggle/working/dlmi/src/model.py")
    description_artifact.add_file("/kaggle/working/dlmi/src/utils.py")
    description_artifact.add_file("/kaggle/working/dlmi/src/data.py")
    wandb.log_artifact(description_artifact)

Start Training ...
                Epoch 0


100%|██████████| 85/85 [01:58<00:00,  1.39s/it]


train_balance_acc : 0.691915 / train_avg_loss : 0.500355


100%|██████████| 24/24 [00:24<00:00,  1.01s/it]


val_balance_acc : 0.71803 / val_avg_loss : 0.382228
Improve avg accuracy :
checkpoint saved to: ./model0.pt
                Epoch 1


100%|██████████| 85/85 [01:13<00:00,  1.16it/s]


train_balance_acc : 0.757054 / train_avg_loss : 0.400541


100%|██████████| 24/24 [00:12<00:00,  1.91it/s]


val_balance_acc : 0.734628 / val_avg_loss : 0.365978
Improve avg accuracy :
checkpoint saved to: ./model1.pt
                Epoch 2


100%|██████████| 85/85 [01:13<00:00,  1.15it/s]


train_balance_acc : 0.767836 / train_avg_loss : 0.36455


100%|██████████| 24/24 [00:12<00:00,  1.90it/s]


val_balance_acc : 0.742147 / val_avg_loss : 0.3634
Improve avg accuracy :
checkpoint saved to: ./model2.pt
                Epoch 3


100%|██████████| 85/85 [01:12<00:00,  1.17it/s]


train_balance_acc : 0.794636 / train_avg_loss : 0.321676


100%|██████████| 24/24 [00:12<00:00,  1.92it/s]


val_balance_acc : 0.746394 / val_avg_loss : 0.347904
Improve avg accuracy :
checkpoint saved to: ./model3.pt
                Epoch 4


100%|██████████| 85/85 [01:11<00:00,  1.18it/s]


train_balance_acc : 0.80644 / train_avg_loss : 0.298472


100%|██████████| 24/24 [00:12<00:00,  1.89it/s]


val_balance_acc : 0.752315 / val_avg_loss : 0.316258
Improve avg accuracy :
checkpoint saved to: ./model4.pt
                Epoch 5


100%|██████████| 85/85 [01:12<00:00,  1.17it/s]


train_balance_acc : 0.823498 / train_avg_loss : 0.271789


100%|██████████| 24/24 [00:12<00:00,  1.91it/s]


val_balance_acc : 0.733394 / val_avg_loss : 0.354955
                Epoch 6


100%|██████████| 85/85 [01:10<00:00,  1.20it/s]


train_balance_acc : 0.832131 / train_avg_loss : 0.254222


100%|██████████| 24/24 [00:12<00:00,  1.94it/s]


val_balance_acc : 0.728699 / val_avg_loss : 0.368378
                Epoch 7


100%|██████████| 85/85 [01:11<00:00,  1.19it/s]


train_balance_acc : 0.846562 / train_avg_loss : 0.233343


100%|██████████| 24/24 [00:12<00:00,  1.90it/s]


val_balance_acc : 0.724887 / val_avg_loss : 0.358291
                Epoch 8


100%|██████████| 85/85 [01:12<00:00,  1.18it/s]


train_balance_acc : 0.858562 / train_avg_loss : 0.211241


100%|██████████| 24/24 [00:12<00:00,  1.95it/s]


val_balance_acc : 0.738544 / val_avg_loss : 0.340689
                Epoch 9


100%|██████████| 85/85 [01:11<00:00,  1.18it/s]


train_balance_acc : 0.860986 / train_avg_loss : 0.205668


100%|██████████| 24/24 [00:12<00:00,  1.91it/s]

val_balance_acc : 0.738439 / val_avg_loss : 0.348372





In [18]:
dataloader_test = data_factory(
    cfg,
    mode="test",
    split_indexes=test_index,
    path_root=path_root,
    shuffle=False,
    drop_last=False,
    transform=transform_val,
)

In [19]:
model.eval()
list_prediction = []
list_id = []
for batch in tqdm(dataloader_test):
    x, annotation = batch
    with torch.no_grad():
        x = model(x.to(cfg["device_1"]))
    pred = torch.argmax(soft_max(x), dim=1)
    list_prediction.extend(pred.tolist())
    list_id.extend(annotation["ID"])

    # Create a DataFrame from the lists
df = pd.DataFrame({"Id": list_id, "Predicted": list_prediction})


grouped_counts = df.groupby("Id")["Predicted"].value_counts().unstack(fill_value=0)
df_grouped = grouped_counts.apply(np.argmax, axis=1)
df_grouped = pd.DataFrame({"Id": df_grouped.index, "Predicted": df_grouped.values})

# Export the DataFrame to a CSV file
df_grouped.to_csv(cfg["filename"], index=False)

print(f"CSV file '{cfg['filename']}' has been created successfully.")

if not cfg["no_wandb"]:
    submission_artifact = wandb.Artifact(
        "submission" + str(uuid.uuid1()).replace("-", ""), type="csv"
    )
    submission_artifact.add_file(cfg["filename"])
    wandb.log_artifact(submission_artifact)
    wandb.finish()

100%|██████████| 27/27 [00:26<00:00,  1.00it/s]

CSV file '/kaggle/working/submission_pretrain.csv' has been created successfully.





# Finetuning

## Create config.

This config contains all the hyparameter usefull for our experiments. There will be logged in wandb.

The weight for the MAE pretraining are avalaible and need to be download here : 

Drive with the weigth [here](https://drive.google.com/drive/u/0/folders/13yrd36hwnCahIzXtedJdakCQZdADHxLd)

In [20]:
cfg = {
    "who": "baptiste",  # or steven
    "no_wandb": False,
    "name_exp": f"exmaple -traning",
    "lr": 1e-5,
    "batch_size": 1,
    "nb_epochs": 20,
    "timm": True,  # is the model from timm
    "timm_model": cfg["timm_model"],
    "dino": False,
    "dino_size": "vits",  # vits, vitb, vitl, vitg
    "adapter": "lora",  # bottleneck, adaptformer, lora, prompttuning
    "model_name": "PatientModelCrossAttentionTab",  # 'vit_small_patch16_224.augreg_in21k', #timm based model
    "pretrained": True,
    "pretrained_path": save_path,
    "nb_class": 2,
    "scheduler": None,  # could be empty or linear, expo ...
    "dataset_name": "DatasetPerPatient",
    "device_1": "cuda:0",
    "device_2": "cuda:1",  # for double device
    # data augmentation
    "filename": f"{path_working}/submission_pretrain.csv",
    "filename_finetune": f"{path_working}/submission_finetune.csv",
    "sub_batch_size": 16,
    "latent_att": 512,
    "head_1": 8,  # 4
    "head_2": 2,
    "feature_dim": cfg["feature_dim"],  # DINOv2, VIT: 192 - 384
    "aggregation": "avg",  # sum, avg, max
    "beta_1": 0.5,
    "beta_2": 0.9,
    "weight_decay": 5e-2,
    "weight_class_0": 3.0,
    "weight_class_1": 1.0,
    "mask_ratio": 0.75,
    "image_size": 224,
    "patch_size": 16,
    "mae_pretrained": "",
    "with_tab": True,
    "mode_split": "load",  # load, strat
    "degrees": (-5, 5),
    "translate": (0.1, 0.1),
    "scale": (1.0, 1.1),
    "fill": (255, 232, 201),
    "p": 0.1,
}

In [21]:
# transform
transform_train = T.Compose(
    [
        v2.PILToTensor(),
        v2.RandomHorizontalFlip(p=cfg["p"]),
        v2.RandomVerticalFlip(p=cfg["p"]),
        v2.RandomAffine(
            degrees=cfg["degrees"],
            translate=cfg["translate"],
            scale=cfg["scale"],
            fill=cfg["fill"],
        ),
    ]
)

transform_val = T.Compose(
    [
        v2.PILToTensor(),
    ]
)

In [22]:
from transformers.optimization import get_linear_schedule_with_warmup

# load data
data_factory = DataloaderFactory()
model_factory = ModelFactory()
dataloader_train = data_factory(
    cfg,
    mode="train",
    split_indexes=train_index,
    path_root=path_root,
    shuffle=True,
    drop_last=True,
    transform=transform_train,
    oversampling={"0": 1, "1": 1},
)

dataloader_val = data_factory(
    cfg,
    mode="train",
    split_indexes=val_index,
    path_root=path_root,
    shuffle=False,
    drop_last=False,
    transform=transform_val,
    oversampling={"0": 1, "1": 1},
)


# load model
model = model_factory(cfg).to(cfg["device_1"])

# optimizer and scheduler
optimizer = torch.optim.AdamW(
    model.parameters(),
    lr=cfg["lr"],
    betas=(cfg["beta_1"], cfg["beta_2"]),
    weight_decay=cfg["weight_decay"],
)

num_training_steps = cfg["nb_epochs"] * len(dataloader_train)
schedulerr = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
    optimizer, T_max=cfg["nb_epochs"], eta_min=5e-6
)
# weight = torch.tensor([cfg['weight_class_0'], cfg['weight_class_1']]).to(cfg['device_1'])
# loss_fn = torch.nn.CrossEntropyLoss(weight=weight)

soft_max = torch.nn.Softmax(1)


cfg["nb_params_train"] = sum(p.numel() for p in model.parameters() if p.requires_grad)

cfg["nb_params_tot"] = sum(p.numel() for p in model.parameters())

print("=" * 50)
print(f'The model has {cfg["nb_params_tot"]} parameters')
print(f'The model has {cfg["nb_params_train"]} trainable parameters')
print(
    f'It represents {np.round(100 * cfg["nb_params_train"]/cfg["nb_params_tot"], 3)} % trainable parameters'
)
print("=" * 50)

The configuration is:
who : baptiste
no_wandb : False
name_exp : exmaple -traning
lr : 1e-05
batch_size : 1
nb_epochs : 20
timm : True
timm_model : vit_small_patch16_224.augreg_in21k
dino : False
dino_size : vits
adapter : lora
model_name : PatientModelCrossAttentionTab
pretrained : True
pretrained_path : ./model4.pt
nb_class : 2
scheduler : None
dataset_name : DatasetPerPatient
device_1 : cuda:0
device_2 : cuda:1
filename : /kaggle/working/submission_pretrain.csv
filename_finetune : /kaggle/working/submission_finetune.csv
sub_batch_size : 16
latent_att : 512
head_1 : 8
head_2 : 2
feature_dim : 384
aggregation : avg
beta_1 : 0.5
beta_2 : 0.9
weight_decay : 0.05
weight_class_0 : 3.0
weight_class_1 : 1.0
mask_ratio : 0.75
image_size : 224
patch_size : 16
mae_pretrained : 
with_tab : True
mode_split : load
degrees : (-5, 5)
translate : (0.1, 0.1)
scale : (1.0, 1.1)
fill : (255, 232, 201)
p : 0.1
Loading custom model PatientModelCrossAttentionTab
We load the weigths ./model4.pt
Use lora ad

In [23]:
weight = torch.tensor([2.5, 1.0]).to("cuda:0")
loss_fn = torch.nn.CrossEntropyLoss(weight=weight)

In [24]:
#############################################
###              Training
#############################################

best_loss = 10000

if not cfg["no_wandb"]:
    run = wandb.init(
        project="DLMI",
        entity="ii_timm",
        name=cfg["name_exp"],
        config=cfg,
    )


print("Start Training ...")
for epoch in range(cfg["nb_epochs"]):
    model.train()
    print("=" * 50)
    print(" " * 15, f"Epoch {epoch}")
    print("=" * 50)

    train_cum_loss = 0
    start_time = time.time()

    #############################
    ###     VAL loop
    #############################
    train_pred = []
    train_label = []
    train_pred_ml = []

    for x, annotation in tqdm(dataloader_train):
        optimizer.zero_grad()
        x = x.to(cfg["device_1"]).squeeze(0)

        ## ML predictions
        converted_dict = {
            "ID": annotation["ID"][0],  # Convert list to single value
            "LYMPH_COUNT": annotation["LYMPH_COUNT"].item(),  # Convert tensor to float
            "AGE": annotation["AGE"].item(),  # Convert tensor to float
            "BIN_GENDER": int(annotation["BIN_GENDER"].item()),  # Convert tensor to int
            "LABEL": annotation[
                "LABEL"
            ].item(),  # Convert tensor to int (assuming LABEL should also be an int)
        }

        ## DL predictions
        features = [
            converted_dict["LYMPH_COUNT"],
            converted_dict["AGE"],
            converted_dict["BIN_GENDER"],
        ]
        features_for_prediction = [features]
        prediction = best_clf.predict_proba(features_for_prediction)

        if cfg["with_tab"]:
            # define tabular data
            lymph_count, age, bin_gender = (
                annotation["LYMPH_COUNT"],
                annotation["AGE"],
                annotation["BIN_GENDER"],
            )
            x_tab = torch.zeros((1, 4)).to(cfg["device_1"])
            x_tab[0, int(bin_gender)] = 1
            x_tab[0, 2] = torch.clamp(age + 1e-6 * np.random.rand(1)[0], 0, 1)
            x_tab[0, 3] = torch.clamp(lymph_count + 1e-6 * np.random.rand(1)[0], 0, 1)

            xout_sub_batch = model(x, x_tab, "train")
        else:
            # None tabular data
            xout_sub_batch = model(x, "train")

        # compute the loss and pred
        loss = loss_fn(
            xout_sub_batch.unsqueeze(0), annotation["LABEL"].to(cfg["device_1"])
        ) / (x.shape[0] / cfg["sub_batch_size"])
        pred = torch.argmax(soft_max(xout_sub_batch.unsqueeze(0)), dim=1)
        pred_ml = torch.argmax(
            0.75 * soft_max(xout_sub_batch.unsqueeze(0)).detach().cpu()
            + 0.25 * prediction,
            dim=1,
        )

        train_cum_loss += loss.item()

        # store the res.
        train_pred.extend(pred.detach().cpu().tolist())
        train_pred_ml.extend(pred_ml.detach().cpu().tolist())
        train_label.extend(annotation["LABEL"].detach().cpu().tolist())

        # backward
        loss.backward()
        optimizer.step()

    if scheduler is not None:
        scheduler.step()

    # compute agg. scores
    train_balance_acc = balanced_accuracy_score(train_pred, train_label)
    train_balance_acc_ml = balanced_accuracy_score(train_pred_ml, train_label)

    train_avg_loss = train_cum_loss / len(dataloader_train)
    print(
        f"train_balance_acc : {np.round(train_balance_acc, 6)} / train_balance_acc_ml : {np.round(train_balance_acc_ml, 6)} / train_avg_loss : {np.round(train_avg_loss, 6)}"
    )
    unique_train, count_train = np.unique(train_pred, return_counts=True)
    unique_train_ml, count_train_ml = np.unique(train_pred_ml, return_counts=True)
    print("Count", unique_train, count_train)
    print("Count_ml", unique_train_ml, count_train_ml)

    # edge case
    if len(unique_train) == 1:
        if unique_train[0] == 1:
            count_train = count_train.tolist()
            count_train.insert(0, 0)
        if unique_train[0] == 0:
            count_train = count_train.tolist()
            count_train.append(0)

    #############################
    ###     VAL loop
    #############################
    val_pred = []
    val_label = []
    val_pred_ml = []
    val_cum_loss = 0
    model.eval()
    for x, annotation in tqdm(dataloader_val):
        converted_dict = {
            "ID": annotation["ID"][0],  # Convert list to single value
            "LYMPH_COUNT": annotation["LYMPH_COUNT"].item(),  # Convert tensor to float
            "AGE": annotation["AGE"].item(),  # Convert tensor to float
            "BIN_GENDER": int(annotation["BIN_GENDER"].item()),  # Convert tensor to int
            "LABEL": annotation[
                "LABEL"
            ].item(),  # Convert tensor to int (assuming LABEL should also be an int)
        }

        ## Pred ML

        features = [
            converted_dict["LYMPH_COUNT"],
            converted_dict["AGE"],
            converted_dict["BIN_GENDER"],
        ]
        features_for_prediction = [features]
        prediction = best_clf.predict_proba(features_for_prediction)

        # Pred DL

        x = x.to(cfg["device_1"]).squeeze(0)

        if cfg["with_tab"]:
            # define tabular data
            lymph_count, age, bin_gender = (
                annotation["LYMPH_COUNT"],
                annotation["AGE"],
                annotation["BIN_GENDER"],
            )
            x_tab = torch.zeros((1, 4)).to(cfg["device_1"])
            x_tab[0, int(bin_gender)] = 1
            x_tab[0, 2] = age
            x_tab[0, 3] = lymph_count

            xout_sub_batch = model(x, x_tab, "val")
        else:
            xout_sub_batch = model(x, "val")
        # compute loss
        loss = loss_fn(
            xout_sub_batch.unsqueeze(0), annotation["LABEL"].to(cfg["device_1"])
        ) / (x.shape[0] / cfg["sub_batch_size"])
        pred = torch.argmax(soft_max(xout_sub_batch.unsqueeze(0)), dim=1)
        pred_ml = torch.argmax(
            0.75 * soft_max(xout_sub_batch.unsqueeze(0)).detach().cpu()
            + 0.25 * prediction,
            dim=1,
        )

        val_cum_loss += loss.item()
        val_pred.extend(pred.detach().cpu().tolist())
        val_pred_ml.extend(pred_ml.detach().cpu().tolist())
        val_label.extend(annotation["LABEL"].detach().cpu().tolist())

    # compute agg. scores
    val_balance_acc = balanced_accuracy_score(val_pred, val_label)
    val_balance_acc_ml = balanced_accuracy_score(val_pred_ml, val_label)
    val_avg_loss = val_cum_loss / len(dataloader_val)
    print(
        f"val_balance_acc : {np.round(val_balance_acc, 6)} / val_balance_acc_ml : {np.round(val_balance_acc_ml, 6)} / val_avg_loss : {np.round(val_avg_loss, 6)}"
    )

    unique_val, count_val = np.unique(val_pred, return_counts=True)
    unique_val_ml, count_val_ml = np.unique(val_pred_ml, return_counts=True)
    print("Count", unique_val, count_val)
    print("Count_ml", unique_val_ml, count_val_ml)

    # edge case
    if len(unique_val) == 1:
        if unique_val[0] == 1:
            count_val = count_val.tolist()
            count_val.insert(0, 0)
        if unique_val[0] == 0:
            count_val = count_val.tolist()
            count_val.append(0)
    print(val_avg_loss, best_loss)

    # Save best model + prints
    if val_avg_loss < best_loss:
        best_loss = val_avg_loss

        print("Improve avg loss :")
        save_path_finetune = os.path.join("./", "model" + str(epoch) + "_finetune.pt")
        torch.save(
            {
                "epoch": epoch,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
                "scheduler_state_dict": scheduler.state_dict()
                if cfg["scheduler"] is not None
                else None,
            },
            save_path_finetune,
        )
        print("checkpoint saved to: {}".format(save_path_finetune))

    print(
        "time",
        (time.time() - start_time) / (len(dataloader_val) + len(dataloader_train)),
    )

    # Save in Wandb
    if not cfg["no_wandb"]:
        wandb.log(
            {
                "epoch": epoch,
                "balance_acc/train": train_balance_acc,
                "balance_acc_ml/train": train_balance_acc_ml,
                "loss/train": train_avg_loss,
                "balance_acc/val": val_balance_acc,
                "balance_acc_ml/val": val_balance_acc_ml,
                "loss/val": val_avg_loss,
                "time": (time.time() - start_time)
                / (len(dataloader_val) + len(dataloader_train)),
                "count_train_0": count_train[0],
                "count_train_1": count_train[1],
                "count_val_0": count_val[0],
                "count_val_1": count_val[1],
            }
        )

[34m[1mwandb[0m: Currently logged in as: [33mbaptcallard[0m ([33mii_timm[0m). Use [1m`wandb login --relogin`[0m to force relogin


Start Training ...
                Epoch 0


100%|██████████| 130/130 [01:30<00:00,  1.44it/s]


train_balance_acc : 0.565359 / train_balance_acc_ml : 0.965116 / train_avg_loss : 0.181272
Count [0 1] [85 45]
Count_ml [0 1] [43 87]


100%|██████████| 33/33 [00:11<00:00,  2.84it/s]


val_balance_acc : 0.69697 / val_balance_acc_ml : 0.925926 / val_avg_loss : 0.160286
Count [1] [33]
Count_ml [0 1] [ 6 27]
0.1602855297652158 10000
Improve avg loss :
checkpoint saved to: ./model0_finetune.pt
time 0.6251700026857341
                Epoch 1


100%|██████████| 130/130 [01:27<00:00,  1.48it/s]


train_balance_acc : 0.643475 / train_balance_acc_ml : 0.983871 / train_avg_loss : 0.170072
Count [0 1] [ 24 106]
Count_ml [0 1] [37 93]


100%|██████████| 33/33 [00:11<00:00,  2.78it/s]


val_balance_acc : 0.910714 / val_balance_acc_ml : 0.96 / val_avg_loss : 0.136785
Count [0 1] [ 5 28]
Count_ml [0 1] [ 8 25]
0.13678534182183671 0.1602855297652158
Improve avg loss :
checkpoint saved to: ./model1_finetune.pt
time 0.6121859345699381
                Epoch 2


100%|██████████| 130/130 [01:26<00:00,  1.50it/s]


train_balance_acc : 0.756235 / train_balance_acc_ml : 0.873611 / train_avg_loss : 0.14586
Count [0 1] [41 89]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:11<00:00,  2.77it/s]


val_balance_acc : 0.738095 / val_balance_acc_ml : 0.794118 / val_avg_loss : 0.146761
Count [0 1] [21 12]
Count_ml [0 1] [17 16]
0.14676117961944052 0.13678534182183671
time 0.6067339203840384
                Epoch 3


100%|██████████| 130/130 [01:27<00:00,  1.49it/s]


train_balance_acc : 0.870532 / train_balance_acc_ml : 0.896167 / train_avg_loss : 0.116524
Count [0 1] [37 93]
Count_ml [0 1] [38 92]


100%|██████████| 33/33 [00:11<00:00,  2.77it/s]


val_balance_acc : 0.916667 / val_balance_acc_ml : 0.916667 / val_avg_loss : 0.064108
Count [0 1] [12 21]
Count_ml [0 1] [12 21]
0.0641081325681598 0.13678534182183671
Improve avg loss :
checkpoint saved to: ./model3_finetune.pt
time 0.6094950079186562
                Epoch 4


100%|██████████| 130/130 [01:28<00:00,  1.47it/s]


train_balance_acc : 0.800325 / train_balance_acc_ml : 0.853084 / train_avg_loss : 0.106288
Count [0 1] [42 88]
Count_ml [0 1] [42 88]


100%|██████████| 33/33 [00:12<00:00,  2.70it/s]


val_balance_acc : 0.8125 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.109998
Count [0 1] [16 17]
Count_ml [0 1] [14 19]
0.10999823945828459 0.0641081325681598
time 0.6165041382327402
                Epoch 5


100%|██████████| 130/130 [01:27<00:00,  1.49it/s]


train_balance_acc : 0.880926 / train_balance_acc_ml : 0.912991 / train_avg_loss : 0.099894
Count [0 1] [41 89]
Count_ml [0 1] [43 87]


100%|██████████| 33/33 [00:11<00:00,  2.77it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.091142
Count [0 1] [14 19]
Count_ml [0 1] [14 19]
0.09114168782252818 0.0641081325681598
time 0.6103764823609334
                Epoch 6


100%|██████████| 130/130 [01:27<00:00,  1.49it/s]


train_balance_acc : 0.898739 / train_balance_acc_ml : 0.912991 / train_avg_loss : 0.0821
Count [0 1] [41 89]
Count_ml [0 1] [43 87]


100%|██████████| 33/33 [00:11<00:00,  2.82it/s]


val_balance_acc : 0.884615 / val_balance_acc_ml : 0.884615 / val_avg_loss : 0.07291
Count [0 1] [13 20]
Count_ml [0 1] [13 20]
0.07291047138542953 0.0641081325681598
time 0.6065890467240035
                Epoch 7


100%|██████████| 130/130 [01:27<00:00,  1.48it/s]


train_balance_acc : 0.891667 / train_balance_acc_ml : 0.93956 / train_avg_loss : 0.079376
Count [0 1] [40 90]
Count_ml [0 1] [39 91]


100%|██████████| 33/33 [00:12<00:00,  2.75it/s]


val_balance_acc : 0.884615 / val_balance_acc_ml : 0.916667 / val_avg_loss : 0.068481
Count [0 1] [13 20]
Count_ml [0 1] [12 21]
0.06848126766315986 0.0641081325681598
time 0.6122734502780657
                Epoch 8


100%|██████████| 130/130 [01:27<00:00,  1.48it/s]


train_balance_acc : 0.921245 / train_balance_acc_ml : 0.927778 / train_avg_loss : 0.054952
Count [0 1] [39 91]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:11<00:00,  2.78it/s]


val_balance_acc : 0.794118 / val_balance_acc_ml : 0.8125 / val_avg_loss : 0.203237
Count [0 1] [17 16]
Count_ml [0 1] [16 17]
0.20323678261995543 0.0641081325681598
time 0.6123736387381524
                Epoch 9


100%|██████████| 130/130 [01:27<00:00,  1.48it/s]


train_balance_acc : 0.927778 / train_balance_acc_ml : 0.945833 / train_avg_loss : 0.044545
Count [0 1] [40 90]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:11<00:00,  2.81it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.119003
Count [0 1] [14 19]
Count_ml [0 1] [14 19]
0.11900278705698755 0.0641081325681598
time 0.6101426844216563
                Epoch 10


100%|██████████| 130/130 [01:27<00:00,  1.48it/s]


train_balance_acc : 0.952179 / train_balance_acc_ml : 0.963889 / train_avg_loss : 0.043781
Count [0 1] [41 89]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:11<00:00,  2.77it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.108016
Count [0 1] [14 19]
Count_ml [0 1] [14 19]
0.10801649957394341 0.0641081325681598
time 0.6129209761239268
                Epoch 11


100%|██████████| 130/130 [01:26<00:00,  1.51it/s]


train_balance_acc : 0.963889 / train_balance_acc_ml : 0.963889 / train_avg_loss : 0.040862
Count [0 1] [40 90]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:12<00:00,  2.69it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.088678
Count [0 1] [14 19]
Count_ml [0 1] [14 19]
0.08867768473503405 0.0641081325681598
time 0.6043388755774937
                Epoch 12


100%|██████████| 130/130 [01:28<00:00,  1.46it/s]


train_balance_acc : 0.905844 / train_balance_acc_ml : 0.905844 / train_avg_loss : 0.065654
Count [0 1] [42 88]
Count_ml [0 1] [42 88]


100%|██████████| 33/33 [00:11<00:00,  2.81it/s]


val_balance_acc : 0.954545 / val_balance_acc_ml : 0.954545 / val_avg_loss : 0.049132
Count [0 1] [11 22]
Count_ml [0 1] [11 22]
0.04913204532666979 0.0641081325681598
Improve avg loss :
checkpoint saved to: ./model12_finetune.pt
time 0.619249358498977
                Epoch 13


100%|██████████| 130/130 [01:27<00:00,  1.49it/s]


train_balance_acc : 0.963889 / train_balance_acc_ml : 0.981944 / train_avg_loss : 0.029378
Count [0 1] [40 90]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:11<00:00,  2.84it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.138232
Count [0 1] [14 19]
Count_ml [0 1] [14 19]
0.13823183694006674 0.04913204532666979
time 0.6058681084334485
                Epoch 14


100%|██████████| 130/130 [01:27<00:00,  1.49it/s]


train_balance_acc : 0.947741 / train_balance_acc_ml : 0.947741 / train_avg_loss : 0.037326
Count [0 1] [43 87]
Count_ml [0 1] [43 87]


100%|██████████| 33/33 [00:11<00:00,  2.77it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.884615 / val_avg_loss : 0.08347
Count [0 1] [14 19]
Count_ml [0 1] [13 20]
0.08347021699121411 0.04913204532666979
time 0.6083543271374848
                Epoch 15


100%|██████████| 130/130 [01:27<00:00,  1.49it/s]


train_balance_acc : 0.930366 / train_balance_acc_ml : 0.941017 / train_avg_loss : 0.068677
Count [0 1] [43 87]
Count_ml [0 1] [42 88]


100%|██████████| 33/33 [00:11<00:00,  2.79it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.116876
Count [0 1] [14 19]
Count_ml [0 1] [14 19]
0.11687600659562039 0.04913204532666979
time 0.6069642049403279
                Epoch 16


100%|██████████| 130/130 [01:26<00:00,  1.50it/s]


train_balance_acc : 0.981944 / train_balance_acc_ml : 0.981944 / train_avg_loss : 0.019099
Count [0 1] [40 90]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:12<00:00,  2.75it/s]


val_balance_acc : 0.8125 / val_balance_acc_ml : 0.8125 / val_avg_loss : 0.204303
Count [0 1] [16 17]
Count_ml [0 1] [16 17]
0.20430327898297532 0.04913204532666979
time 0.6070266311154043
                Epoch 17


100%|██████████| 130/130 [01:28<00:00,  1.48it/s]


train_balance_acc : 0.952179 / train_balance_acc_ml : 0.958604 / train_avg_loss : 0.049862
Count [0 1] [41 89]
Count_ml [0 1] [42 88]


100%|██████████| 33/33 [00:11<00:00,  2.75it/s]


val_balance_acc : 0.857143 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.140232
Count [0 1] [14 19]
Count_ml [0 1] [14 19]
0.14023184849208697 0.04913204532666979
time 0.6139632631664628
                Epoch 18


100%|██████████| 130/130 [01:28<00:00,  1.47it/s]


train_balance_acc : 0.969992 / train_balance_acc_ml : 0.969992 / train_avg_loss : 0.027799
Count [0 1] [41 89]
Count_ml [0 1] [41 89]


100%|██████████| 33/33 [00:11<00:00,  2.82it/s]


val_balance_acc : 0.821154 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.118872
Count [0 1] [13 20]
Count_ml [0 1] [14 19]
0.11887182341091705 0.04913204532666979
time 0.6144075583826545
                Epoch 19


100%|██████████| 130/130 [01:28<00:00,  1.47it/s]


train_balance_acc : 0.981944 / train_balance_acc_ml : 0.981944 / train_avg_loss : 0.020556
Count [0 1] [40 90]
Count_ml [0 1] [40 90]


100%|██████████| 33/33 [00:11<00:00,  2.79it/s]

val_balance_acc : 0.821154 / val_balance_acc_ml : 0.857143 / val_avg_loss : 0.164337
Count [0 1] [13 20]
Count_ml [0 1] [14 19]
0.16433718178858603 0.04913204532666979
time 0.6135741889111104





In [25]:
if not cfg["no_wandb"]:
    model_artifact = wandb.Artifact(
        "model" + str(uuid.uuid1()).replace("-", ""), type="model"
    )
    model_artifact.add_file(save_path_finetune)
    wandb.log_artifact(model_artifact)

    description_artifact = wandb.Artifact(
        "description_model" + str(uuid.uuid1()).replace("-", ""), type="python"
    )

    !cp -r $path_working/dlmi/src/* $path_working/
    description_artifact.add_file(f"{path_working}/model.py")
    description_artifact.add_file(f"{path_working}/utils.py")
    description_artifact.add_file(f"{path_working}/data.py")
    wandb.log_artifact(description_artifact)

# Prediction

In [26]:
dataloader_test = data_factory(
    cfg,
    mode="test",
    split_indexes=test_index,
    path_root=path_root,
    shuffle=False,
    drop_last=False,
    transform=transform_val,
)

In [27]:
test_pred = []
test_ID = []


map_results = {
    "Id": [],
    "Predicted": [],
}


map_results_ml = {
    "Id": [],
    "Predicted": [],
}


map_results_logit = {
    "Id": [],
    "logit_0": [],
    "logit_1": [],
}

print("Load model", save_path_finetune)
model.load_state_dict(torch.load(save_path_finetune)["model_state_dict"])
model.eval()
for x, annotation in tqdm(dataloader_test):
    # forward
    converted_dict = {
        "ID": annotation["ID"][0],  # Convert list to single value
        "LYMPH_COUNT": annotation["LYMPH_COUNT"].item(),  # Convert tensor to float
        "AGE": annotation["AGE"].item(),  # Convert tensor to float
        "BIN_GENDER": int(annotation["BIN_GENDER"].item()),  # Convert tensor to int
        "LABEL": annotation[
            "LABEL"
        ].item(),  # Convert tensor to int (assuming LABEL should also be an int)
    }

    features = [
        converted_dict["LYMPH_COUNT"],
        converted_dict["AGE"],
        converted_dict["BIN_GENDER"],
    ]
    features_for_prediction = [features]
    prediction_ml = best_clf.predict_proba(features_for_prediction)

    with torch.no_grad():
        x = x.to(cfg["device_1"]).squeeze(0)

        if cfg["with_tab"]:
            # define tabular data
            lymph_count, age, bin_gender = (
                annotation["LYMPH_COUNT"],
                annotation["AGE"],
                annotation["BIN_GENDER"],
            )
            x_tab = torch.zeros((1, 4)).to(cfg["device_1"])
            x_tab[0, int(bin_gender)] = 1
            x_tab[0, 2] = age
            x_tab[0, 3] = lymph_count

            x = model(x, x_tab, "val")
        else:
            x = model(x, "val")

        logit = soft_max(x.unsqueeze(0))
        pred = torch.argmax(logit, dim=1)
        pred_ml = torch.argmax(
            0.85 * soft_max(x.unsqueeze(0)).detach().cpu() + 0.15 * prediction_ml, dim=1
        )

        # DL
        map_results["Predicted"].extend(pred.detach().cpu().tolist())
        map_results["Id"].extend(annotation["ID"])

        map_results_logit["logit_0"].append(logit[0][0].item())
        map_results_logit["logit_1"].append(logit[0][1].item())
        map_results_logit["Id"].extend(annotation["ID"])

        # DL + ML
        map_results_ml["Predicted"].extend(pred_ml.detach().cpu().tolist())
        map_results_ml["Id"].extend(annotation["ID"])

Load model ./model12_finetune.pt


100%|██████████| 42/42 [00:16<00:00,  2.55it/s]


Save in Wandb !

In [28]:
df_results = pd.DataFrame(map_results)
df_results.to_csv(cfg["filename_finetune"], index=False)

df_results_ml = pd.DataFrame(map_results_ml)
df_results_ml.to_csv("submission_dl_ml.csv", index=False)


df_results = pd.DataFrame(map_results_logit)
df_results.to_csv("logit.csv", index=False)

if not cfg["no_wandb"]:
    # log index
    df_train_index = pd.DataFrame({"train": train_index})
    df_train_index.to_csv("train_index.csv", index=False)
    df_val_index = pd.DataFrame({"val": val_index})
    df_val_index.to_csv("val_index.csv", index=False)

    csv_artifact = wandb.Artifact(
        "submission" + str(uuid.uuid1()).replace("-", ""), type="csv"
    )
    csv_artifact.add_file(cfg["filename_finetune"])
    csv_artifact.add_file("submission_dl_ml.csv")
    csv_artifact.add_file(cfg["filename"])
    csv_artifact.add_file("logit.csv")
    csv_artifact.add_file("train_index.csv")
    csv_artifact.add_file("val_index.csv")
    wandb.log_artifact(csv_artifact)

    wandb.finish()

VBox(children=(Label(value='103.869 MB of 103.869 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
balance_acc/train,▁▂▄▆▅▆▇▆▇▇▇█▇█▇▇█▇██
balance_acc/val,▁▇▂▇▄▅▆▆▄▅▅▅█▅▅▅▄▅▄▄
balance_acc_ml/train,▇█▂▃▁▄▄▆▅▆▇▇▄█▆▆█▇▇█
balance_acc_ml/val,▇█▁▆▄▄▅▆▂▄▄▄█▄▅▄▂▄▄▄
count_train_0,█▁▃▂▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
count_train_1,▁█▆▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
count_val_0,▁▃█▅▆▆▅▅▇▆▆▆▅▆▆▆▆▆▅▅
count_val_1,█▆▁▄▃▃▄▄▂▃▃▃▄▃▃▃▃▃▄▄
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss/train,██▆▅▅▄▄▄▃▂▂▂▃▁▂▃▁▂▁▁

0,1
balance_acc/train,0.98194
balance_acc/val,0.82115
balance_acc_ml/train,0.98194
balance_acc_ml/val,0.85714
count_train_0,40.0
count_train_1,90.0
count_val_0,13.0
count_val_1,20.0
epoch,19.0
loss/train,0.02056
