# Deep Learning for Medical Imaging
## Challenge 2024: Lymphocytosis classification

Kelthoum KERBOUA: kelthoum.kerboua@telecom-paris.fr

Fatima BALDE: fatima.balde@telecom-paris.fr

### Import libraries and Load dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import glob

import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
from PIL import Image


import pandas as pd
import cv2
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score,accuracy_score

#!pip install segmentation_models_pytorch
#import segmentation_models_pytorch as smp

import os
import time
import copy
import pickle
from datetime import datetime, date

import warnings
warnings.filterwarnings("ignore")






We used google collab with T4 GPU.

In [None]:
cuda = torch.cuda.is_available()
device = 'cuda' if cuda else 'cpu'

The dataset is compressed in the google drive. The following lines of code load the data.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip "./drive/MyDrive/dlmi-lymphocytosis-classification.zip"
directory='dlmi-lymphocytosis-classification'

## Model 1

### Pre-processing

We store the blood smears for each patient in a list and calculate their age.

In [None]:
directory='dlmi-lymphocytosis-classification'
training_data = pd.read_csv('dlmi-lymphocytosis-classification/trainset/trainset_true.csv')
training_data['IMAGES'] = None
training_data['YOB'] = pd.DatetimeIndex(training_data['DOB']).year
training_data['MOB'] = pd.DatetimeIndex(training_data['DOB']).month
training_data['DOB']=pd.to_datetime(training_data['DOB'])
current_date = datetime.now()
training_data['AGE'] = (current_date - training_data['DOB']).astype('<m8[Y]')


for i, row in tqdm(training_data.iterrows(), total=len(training_data)):
    list_paths = glob.glob(f'dlmi-lymphocytosis-classification/trainset/{row["ID"]}/*.jpg')
    list_images = [cv2.imread(path) for path in list_paths]
    training_data.at[i, 'IMAGES'] = list_images

test_data = pd.read_csv(directory +'/testset/testset_data.csv')
test_data['IMAGES'] = None
test_data['YOB'] = pd.DatetimeIndex(test_data['DOB']).year
test_data['MOB'] = pd.DatetimeIndex(test_data['DOB']).month
test_data['DOB']=pd.to_datetime(test_data['DOB'])
test_data['AGE'] = (current_date - test_data['DOB']).astype('<m8[Y]')


test_IDs=[]
for i, row in tqdm(test_data.iterrows(), total=len(test_data)):
    list_paths = glob.glob(f'dlmi-lymphocytosis-classification/testset/{row["ID"]}/*.jpg')
    test_IDs.append(row["ID"])
    list_images = [cv2.imread(path) for path in list_paths]
    test_data.at[i, 'IMAGES'] = list_images

100%|██████████| 163/163 [00:13<00:00, 12.49it/s]
100%|██████████| 42/42 [00:02<00:00, 14.72it/s]


We define a MILDataset which returns the list of images (blood smears), clinical attributes and label for each patient.

In [None]:
class MILDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=transforms.ToTensor()):
        super().__init__()

        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        images = [self.transform(image) for image in row['IMAGES']]
        images = torch.stack(images, 0)
        label = row['LABEL']
        clinical = np.array([row['MOB'], row['YOB'], row['AGE'], row['LYMPH_COUNT']])
        return images, clinical, label

We create a collate_fn function that concatenates the lists of blood smears and shuffles them, while remembering the index of their bags (patient) and their place in the bag.

In [None]:
def collate_fn(inputs):
    images, clinical, labels = zip(*inputs)
    indices_of_bags = torch.cat([i*torch.ones(len(images[i]), dtype=torch.long) for i in range(len(images))])
    # indices_of_bags = patient index
    indices_in_bags = torch.cat([torch.arange(len(images[i]), dtype=torch.long) for i in range(len(images))])
    # indices_in_bags = image index in patient's blood smears list
    images = torch.cat(images, 0)
    permutation = torch.randperm(len(images))
    images = images[permutation]
    indices_in_bags = indices_in_bags[permutation]
    indices_of_bags = indices_of_bags[permutation]
    labels = torch.tensor(labels, dtype=torch.long)
    clinical = torch.tensor(clinical, dtype=torch.float)
    return images, clinical, labels, indices_of_bags, indices_in_bags

We divide the dataset into a train and a validation set (50%/50%), keeping the same proportion of labels. We define data augmentation techniques for the training set. Pre-processing steps are defined for all images (crop image to size 112x112, normalization, transformation to tensor torch).

In [None]:
train_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.RandomRotation(degrees=(0, 180)),
    torchvision.transforms.CenterCrop(112),
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    torchvision.transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(torch.tensor([0.8183, 0.6977, 0.7034]), torch.tensor([0.1917, 0.2156, 0.0917])),
])

test_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.CenterCrop(112),
    transforms.ToTensor(),
    transforms.Normalize(torch.tensor([0.8183, 0.6977, 0.7034]), torch.tensor([0.1917, 0.2156, 0.0917])),
])

train_data, valid_data = train_test_split(training_data, test_size=0.5, stratify= training_data['LABEL'], random_state=1)

train_dataset = MILDataset(train_data, train_transforms)
valid_dataset = MILDataset(valid_data, test_transforms)
test_dataset  = MILDataset(test_data, test_transforms)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=16, collate_fn=collate_fn, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=16, collate_fn=collate_fn, shuffle=False)
test_loader  = torch.utils.data.DataLoader(test_dataset, batch_size=16, collate_fn=collate_fn, shuffle=False)

### Definition of MIL Model

In [None]:
class MILModel(nn.Module):
    def __init__(self):
        super().__init__()

        self.resnet = models.resnet18(pretrained=True)
        self.resnet_layers= torch.nn.Sequential(*list(self.resnet.children())[:-1])
        self.fc=nn.Linear(512,1)

        self.fc1=nn.Linear(4,2)
        self.fc2=nn.Linear(2,1)
        self.relu=nn.ReLU()

    def forward(self, images, clinical, indices_of_bags, indices_in_bags):
        features = []
        start_batch = 0
        while start_batch < images.shape[0]:
          end_batch = min(start_batch+100, images.shape[0])
          features.append(self.resnet_layers(images[start_batch:end_batch]))
          start_batch = end_batch
        features = torch.cat(features, 0)


        indices_bags = indices_of_bags.max() + 1
        max_len_bags = indices_in_bags.max() + 1
        features=features.view(features.shape[0],-1)
        embeddings=torch.zeros(indices_bags, max_len_bags, *features.shape[1:], device=features.device)
        embeddings[indices_of_bags, indices_in_bags]=features
        weights= torch.zeros(indices_bags, max_len_bags, device=features.device, dtype=torch.long)
        weights[indices_of_bags, indices_in_bags] = 1
        weights=1/torch.sum(weights, dim=1, keepdim=True)
        weights.reshape(indices_bags,1)
        embeddings=torch.sum(embeddings, dim=1)*weights
        output=self.fc(embeddings)

        output2=self.fc2(self.relu(self.fc1(clinical)))

        return output.view(-1), output2.view(-1)

### Training of the model

The following functions can be used to train, evaluate and save the model.

In [None]:
def validation(model, valid_loader, loss_function, clinical_att):
  with torch.no_grad():
      validation_loss = 0.0
      validation_accuracy = 0.0
      number_samples = 0
      validation_proba = []
      validation_labels = []

      for i, batch in enumerate(valid_loader):
        images, clinical, labels, indices_of_bags, indices_in_bags = batch
        labels = labels.to(device)
        outputs1, outputs2 = model(images.to(device), clinical.to(device), indices_of_bags.to(device), indices_in_bags.to(device))
        outputs= (outputs1+outputs2)/2 if clinical_att else outputs1
        loss = loss_function(outputs, labels)
        loss = loss.sum()
        batch_proba = outputs.sigmoid()
        batch_predictions = (batch_proba > 0.5).long()
        validation_loss += float(loss)
        validation_accuracy += float((batch_predictions == labels).float().sum())
        number_samples += float(len(labels))
        validation_labels.append(labels.cpu())
        validation_proba.append(batch_proba.cpu())

      validation_proba = torch.cat(validation_proba, 0)
      validation_labels = torch.cat(validation_labels, 0)
      bal_accuracy = balanced_accuracy_score(validation_labels, validation_proba > 0.5)
      validation_loss = validation_loss/number_samples
      validation_accuracy = validation_accuracy/number_samples
  return validation_loss, validation_accuracy, bal_accuracy

def save_model(model, optimizer):
  files = os.listdir('./')
  for file in files:
      if file.endswith('best_model.pth'):
          os.remove(os.path.join('./', file))

  torch.save({'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict()},
              'best_model.pth')

  print("Saved Best Model")
  return

def BCEwithlogits(outputs, labels):
  return F.binary_cross_entropy_with_logits(outputs, labels.float(), reduction='none')

def DiceLoss(outputs, targets):
  function=smp.losses.DiceLoss(mode='binary')
  return function(outputs.view(-1,1), targets.view(-1,1))

def training(model, trainloader, validloader,epochs, optimizer, scheduler=None, loss_function=BCEwithlogits, clinical_att=False):

    best_validation_balanced_accuracy=0

    for epoch in range(epochs):
        epoch_loss = 0.0
        epoch_accuracy = 0.0
        number_samples = 0

        model.train()

        for i, batch in enumerate(trainloader):

            optimizer.zero_grad()
            images, clinical, labels, indices_of_bags, indices_in_bags = batch
            labels = labels.to(device)
            outputs1, outputs2 = model(images.to(device), clinical.to(device), indices_of_bags.to(device), indices_in_bags.to(device))
            outputs= (outputs1+outputs2)/2 if clinical_att else outputs1
            loss1 = loss_function(outputs1, labels)
            loss2 = loss_function(outputs2, labels)
            loss = loss_function(outputs, labels)
            batch_proba = outputs.sigmoid()
            batch_predictions = (batch_proba > 0.5).long()
            mean_loss = (loss1.sum() + loss2.sum())/len(labels) if clinical_att else loss.sum()/len(labels)
            mean_loss.backward()
            optimizer.step()
            epoch_loss += loss.sum().item()
            epoch_accuracy += (batch_predictions == labels).float().sum().item()
            number_samples += len(labels)

        if scheduler is not None:
            scheduler.step()

        epoch_loss = epoch_loss/number_samples
        epoch_accuracy = epoch_accuracy/number_samples
        print(f'Epoch {epoch+1}: training loss {epoch_loss:.4f}, training accuracy {epoch_accuracy:.4f}')

        #validation
        validation_loss, validation_accuracy, bal_accuracy = validation(model, validloader, loss_function, clinical_att)
        print(f'Validation accuracy {validation_accuracy:.4f}, loss {validation_loss:.4f}, balanced accuracy {bal_accuracy:.4f}\n')

        #save model
        if bal_accuracy>best_validation_balanced_accuracy:
            best_validation_balanced_accuracy=bal_accuracy
            save_model(model,optimizer)

    print('End OF Training')


We train a new model with the following hyperparameters.

In [None]:
#Training
model = MILModel().to(device)
optimizer = torch.optim.Adam(model.parameters(), 1e-4)
#scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)
training(model, train_loader, valid_loader, 30, optimizer) #, loss_function=DiceLoss, scheduler)
optimizer = torch.optim.Adam(model.parameters(), 1e-5)
#scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.8)
training(model, train_loader, valid_loader, 10, optimizer)

### Write Kaggle submission

We load the best model and write the submission for Kaggle.

In [None]:
path= './best_model.pth'
print('Loading best model')
checkpoint = torch.load(path)
model.load_state_dict(checkpoint['model_state_dict'])

print('writing submission')
model.eval()
test_proba = []
test_predictions = []

clinical_att=False

with torch.no_grad():
    for i, batch in enumerate(tqdm(test_loader)):
        images, clinical, labels, indices_of_bags, indices_in_bags = batch
        outputs1, outputs2 = model(images.to(device), clinical.to(device), indices_of_bags.to(device), indices_in_bags.to(device))
        outputs=(outputs1+outputs2)/2 if clinical_att else outputs1
        proba = outputs.sigmoid().cpu()
        test_proba.append(proba)

test_proba = torch.cat(test_proba, 0)
test_predictions = test_proba > 0.5


with open('submission12.csv', 'w') as f:
    f.write('ID,Predicted\n')
    for ID, pred in zip(test_IDs, test_predictions):
        f.write(f'{ID},{pred.int().item()}\n')

Loading best model
writing submission


100%|██████████| 3/3 [00:03<00:00,  1.14s/it]


## Model 2

### Preprocessing

In [None]:
annot=directory_dataset+ 'clinical_annotation.csv'
annotations=pd.read_csv(annot)
annotations['GENDER']=annotations['GENDER'].replace({'M': 0.0, 'F': 1.0, 'f':1.0})
#change the format of DOB
for i in range(len(annotations)):
    dob=annotations.at[i, 'DOB']
    if '-' in dob:
        parts = dob.split('-')
        annotations.at[i, 'DOB']= parts[1] + '/' + parts[0] + '/' + parts[2]

#add an age column
annotations['DOB'] = pd.to_datetime(annotations['DOB'], format='%m/%d/%Y')
date_actuelle = datetime.now()
annotations['Age'] = date_actuelle.year - annotations['DOB'].dt.year
train_dir=directory_dataset+'trainset'
test_dir=directory_dataset+'testset'

#charge the name files
def files_name(dir):
    names = []
    for name in os.listdir(dir):
        if os.path.isdir(os.path.join(dir, name)):
            names.append(name)
    return names
train_names=files_name(train_dir)
test_names=files_name(test_dir)

test_df =annotations.loc[(annotations['LABEL'] == -1)]
test_df=test_df.reset_index(drop = True)
train_df_t0=annotations.loc[(annotations['LABEL'] == 0)]
train_df_t1=annotations.loc[(annotations['LABEL'] == 1)]
val_size = 0.2
train_df0, val_df0 = train_test_split(train_df_t0, test_size=val_size, random_state=42)
train_df1, val_df1 = train_test_split(train_df_t1, test_size=val_size, random_state=42)
train_df = pd.concat([train_df0, train_df1], ignore_index=True)
val_df = pd.concat([val_df0, val_df1], ignore_index=True)

In [None]:
#check that we have the same proportion in both training and validation set
count_zero_labels = (train_df['LABEL'] == 0).sum()
count_ones_labels = (train_df['LABEL'] == 1).sum()
print("Nombre de valeurs égales à 0 dans train_df['LABEL'] :", count_zero_labels)
print("Nombre de valeurs égales à 0 dans train_df['LABEL'] :", count_ones_labels)
count_zero_labels = (val_df['LABEL'] == 0).sum()
count_ones_labels = (val_df['LABEL'] == 1).sum()
print("Nombre de valeurs égales à 0 dans train_df['LABEL'] :", count_zero_labels)
print("Nombre de valeurs égales à 0 dans train_df['LABEL'] :", count_ones_labels)

In [None]:
mean_age=train_df['Age'].mean()
std_age=train_df['Age'].mean()
mean_ct=train_df['LYMPH_COUNT'].mean()
std_ct=train_df['LYMPH_COUNT'].mean()
class PatientDataset(Dataset):
    def __init__(self, img_dir, dataframe, transform=None):
        self.dataframe = dataframe
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)
    def __getitem__(self, idx):
        images=[]
        patient=self.dataframe.loc[idx,'ID']
        target=self.dataframe.loc[idx,'LABEL']
        gender = torch.tensor((self.dataframe.loc[idx, 'Age']-mean_age)/std_age)
        lymph_count = torch.tensor((self.dataframe.loc[idx, 'LYMPH_COUNT']-mean_ct)/std_ct)
        datas=[gender,lymph_count]
        files = os.listdir(os.path.join(self.img_dir, patient))
        for file in files:
            direc = os.path.join(self.img_dir, patient, file)
            img = Image.open(direc)
            if self.transform:
                img = self.transform(img)
            images.append(img)
        return torch.stack(images),torch.Tensor(datas),torch.tensor(target,dtype=torch.float32 ),patient# des torch.tensor?

#transformations to apply on images
data_transforms_train = transforms.Compose([
    torchvision.transforms.CenterCrop(112),
    torchvision.transforms.RandomHorizontalFlip(p=0.5),
    torchvision.transforms.RandomVerticalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])
data_transforms_test=transforms.Compose([
    torchvision.transforms.CenterCrop(112),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))])

#charge the dataset into train test and val loader
train_dataset=PatientDataset(train_dir,train_df,data_transforms_train)
val_dataset=PatientDataset(train_dir,val_df,data_transforms_test)
batch_size=1
trainloader = DataLoader(train_dataset,batch_size = batch_size,shuffle = True)
valloader=DataLoader(val_dataset,batch_size = batch_size,shuffle = True)
test_dataset=PatientDataset(test_dir,test_df,data_transforms_test)
testloader = DataLoader(test_dataset,batch_size = batch_size,shuffle = True)


### MIL Model

In [None]:

from torchvision.models.resnet import BasicBlock, Bottleneck
from torchvision.models import resnet34,resnet18

class resnet(nn.Module):
    def __init__(self):
        super(resnet, self).__init__()
        self.resnet = resnet18(weights=None)#'DEFAULT')
        self.resnet_layers= torch.nn.Sequential(*list(self.resnet.children())[:-1])
        self.fc=nn.Linear(512,256)
        self.fc1=nn.Linear(256,1)

    def forward(self, images):

        out = self.resnet_layers(images)
        out= torch.squeeze(out, axis=2)
        out=torch.mean(out, dim=0)
        out=torch.flatten(out)
        x=nn.ReLU()(self.fc(out))
        x=self.fc1(x)
        return x

class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(2, 150)
        self.layer2 = nn.Linear(150, 250)
        self.activation = nn.Sigmoid()
        self.layer3 = nn.Linear(250, 1)

    def forward(self, x):
        x = nn.ReLU()(self.layer1(x))
        x = nn.ReLU()(self.layer2(x))
        x = self.layer3(x)
        return  x


### Training of the Model

In [None]:
def validation(model,valloader,criterion,device='cpu',typet='mlp'):
    with torch.no_grad():
        validation_loss = 0.0
        validation_accuracy = 0.0
        for i,  (inputs,datas, targets,_)  in enumerate(valloader):
            inputs,datas, targets = inputs.to(device), datas.to(device),targets.to(device)
            outputs=[]
            if (typet=='mlp'):
                outputs = model.forward(datas[0])
            if (typet=='res'):
                outputs = model.forward(inputs[0])
            loss = criterion(outputs, targets)
            validation_loss+=loss.item()
            predicted=outputs
            pred_np=np.where(predicted.detach().cpu().numpy()>0.6,1,0)
            validation_accuracy+=accuracy_score(pred_np,targets.detach().cpu().numpy())
        print(' Loss_acc : {:.4f}    Balanced Accuracy : {:.4f} %'.format(validation_loss/len(valloader),100*validation_accuracy/len(valloader)))
    return validation_loss, validation_accuracy

def save_model(model, optimizer,opt='mlp'):
    files = os.listdir('./')
    for file in files:
        if file.endswith('best_model.pth'):
            os.remove(os.path.join('./', file))
    if(opt=='mlp'):
        torch.save({'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict()},
              'best_model_mlp.pth')
    else:
        torch.save({'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict()},
              'best_model_res.pth')

    print("Saved Best Model")
    return

def train(model,trainloader,valloader,criterion,optimizer,n_epochs,device='cpu',typet='mlp'):
    model.train()
    train_losses=[]
    train_acc=[]
    print('Start training')
    best_validation_balanced_accuracy=0
    for epoch in range(0,n_epochs):
        t_loss=0
        acc=0
        bacc=0
        for batch_idx, (inputs,datas, targets,_) in enumerate(trainloader):
            inputs,datas, targets = inputs.to(device), datas.to(device),targets.to(device)
            optimizer.zero_grad()
            outputs=[]
            if (typet=='mlp'):
                outputs = model.forward(datas[0])
            if (typet=='res'):
                outputs = model.forward(inputs[0])

            loss=criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            t_loss+=loss.item()

            predicted=outputs
            pred_np=np.where(predicted.detach().cpu().numpy()>0.5,1,0)
            bacc+=accuracy_score(pred_np,targets.detach().cpu().numpy())


        train_losses.append(t_loss/len(trainloader))
        if epoch % 1 == 0  :
            print(' Epoch: {} : Loss_bag : {:.4f}  Balanced Accuracy : {:.4f} %'.format(epoch,t_loss/len(trainloader),100*bacc/len(trainloader)))

        validation_loss, validation_accuracy=validation(model,valloader,criterion,device=device,typet=typet)
                #save model
        if validation_accuracy>best_validation_balanced_accuracy:
            best_validation_balanced_accuracy=validation_accuracy
            save_model(model,optimizer,opt=typet)
    #print(' Epoch: {} : Loss_bag : {:.4f}    Accuracy : {:.4f} Balanced Accuracy : {:.4f} %'.format(epoch, t_loss/len(trainloader),100*acc/len(trainloader),100*bacc/len(trainloader)))
    return train_losses,train_acc


In [None]:
device='cuda'
k=8
model_mlp=MLP().to(device)
model_res=resnet().to(device)
learning_rate=1e-4
w_decay=5e-4
criterion = nn.BCEWithLogitsLoss()


In [None]:
learning_rate=1e-4
w_decay=5e-4
optimizer_res = torch.optim.Adam(model_res.parameters(), lr=learning_rate, weight_decay=w_decay)
lr_scheduler_res = torch.optim.lr_scheduler.StepLR(optimizer_res, step_size=10, gamma=0.1)
n_epochs=80
train(model_res,trainloader,valloader,criterion,optimizer_res,n_epochs,device=device,typet='res')

In [None]:
learning_rate=1e-4
w_decay=5e-4
optimizer_mlp = torch.optim.Adam(model_mlp.parameters(), lr=learning_rate, weight_decay=w_decay)
lr_scheduler_mlp = torch.optim.lr_scheduler.StepLR(optimizer_mlp, step_size=100, gamma=0.1)

n_epochs=60
train(model_mlp,trainloader,valloader,criterion,optimizer_mlp,n_epochs,device=device,typet='mlp')

### Load and submission file

In [None]:
print('Loading best model')
dir_out='/kaggle/working/'
checkpoint = torch.load(dir_out+'/best_model_mlp.pth')
model_mlp.load_state_dict(checkpoint['model_state_dict'])

In [None]:
print('Loading best model')
dir_out='/kaggle/working/'
checkpoint = torch.load(dir_out+'/best_model_res.pth')
model_res.load_state_dict(checkpoint['model_state_dict'])

In [None]:
preds={}
with torch.no_grad():
    for batch_idx, (inputs,datas, targets,patient) in enumerate(testloader):
        inputs,datas, targets = inputs.to(device), datas.to(device),targets.to(device)
        outputs_ft= model_res.forward(inputs[0])
        outputs_bag = model_mlp.forward(datas[0])
        predicted=(outputs_bag+outputs_ft)/2
        pred_np=np.where(predicted.detach().cpu().numpy()>0.5,1.0,0.0)
        preds[patient[0]]=int(pred_np[0] )
preds

In [None]:
dataframe = pd.DataFrame.from_dict(preds, orient='index', columns=['Predicted'])
dataframe.reset_index(inplace=True)
dataframe.rename(columns={'index': 'ID'}, inplace=True)
dataframe.to_csv('submission_only_mlp8.csv', index=False)
print(dataframe.head())