In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
"""
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
"""

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ae-mehdi/ae_Mehdi.pt
/kaggle/input/dlmi-challenge/clinical_annotation.csv
/kaggle/input/dlmi-challenge/trainset/trainset/trainset_true.csv
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000050.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000038.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000081.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000091.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000040.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000021.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000015.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000075.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000096.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000029.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000076.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000056.jpg
/kaggle/input/dlmi-challenge/trainset/trainset/P135/000077.jpg
/kaggle/input/dlmi-challenge

In [13]:
import os 
import numpy as np
import pandas as pd 
import warnings
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import balanced_accuracy_score
from torch.utils.data import DataLoader, random_split
from torch.utils.data import Dataset
from PIL import Image

In [14]:
# Suppress FutureWarnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# PATHS

In [4]:
cwd = os.getcwd()
trainset_path = "/kaggle/input/dlmi-challenge/trainset/trainset"
testset_path = "/kaggle/input/dlmi-challenge/testset/testset"
ae_Mehdi = "/kaggle/input/ae-mehdi/ae_Mehdi.pt"

ae_training = False
submission = False

# TOOLS - Custom classes and functions 

## Data loaders 

In [2]:
class PatientDataset(Dataset):
    def __init__(self, root_dir, annotation: pd.DataFrame, patients: list, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.patients = patients
        self.df = annotation

    def __len__(self):
        return len(self.patients)

    def __getitem__(self, idx):
        patient_folder = os.path.join(self.root_dir, self.patients[idx])
        images = []
        for filename in os.listdir(patient_folder):
            image_path = os.path.join(patient_folder, filename)
            image = Image.open(image_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            images.append(image)

        # get label 
        label = self.df[self.df.index == self.patients[idx]].LABEL
        label_tensor = torch.tensor([label], dtype=torch.float32)

        # get features 
        features = self.df[self.df.index == self.patients[idx]].drop("LABEL", axis=1).to_numpy()
        features_tensor = torch.tensor([features], dtype=torch.float32)
    
        return torch.stack(images), features_tensor, label_tensor, self.patients[idx]

In [3]:
class ImageLevelDataset(Dataset):
    def __init__(self, root_dir, list_images, labels, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.list_img = list_images
        self.labels = labels

    def __len__(self):
        return len(self.list_img)

    def __getitem__(self, idx):
        img_path, patient = self.list_img[idx]

        # get image and label
        label = self.labels[patient]
        image = Image.open(img_path).convert('RGB')
        
        if self.transform:  # transform image
                image = self.transform(image)

        # transfrom label to right format
        label_tensor = torch.tensor([label], dtype=torch.float32)
        return image, label_tensor

## Models

### Features extractor 

In [4]:
class Autoencoder(nn.Module):
    def __init__(self, latent_size=128):
        super(Autoencoder, self).__init__()

        # define latent size
        self.latent_size = latent_size

        # define encoder 
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # close to latent space
        self.fc_encoder = nn.Linear(256 * 28 * 28, latent_size)
        self.fc_decoder = nn.Linear(latent_size, 256 * 28 * 28)

        # decoder architecture
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1,output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 3, kernel_size=3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        x = self.fc_encoder(x)
        x = self.fc_decoder(x)
        x = x.view(x.size(0), 256, 28, 28)
        x = self.decoder(x)
        return x

    def embedding(self, x):
        x = self.encoder(x)
        x = x.view(x.size(0), -1)
        x = self.fc_encoder(x)
        return x

### Classifier

In [5]:
class DeepSets(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int):
        """
        Initialization of the deep set model. 

        :params input_dim: int
            Dimension of each element of the input. 
        :params hidden_dim: int 
            Dimension of hidden layers. 
        :parasm output_dim: int 
            Dimensionality of output
        """
        super(DeepSets, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        
        # Shared layers
        self.shared_layers = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),  
            nn.Sigmoid()
        )
        
        # Permutation-invariant layer
        self.invariant_layer = nn.Sequential(
            nn.Linear(hidden_dim+3, output_dim), 
            nn.Sigmoid()
        )

    def forward(self, x):
        """
        Foward function of deep set model that given images features and 
        tabular data return a prediction.

        :params x: tuple
            Tuples containing one 2d tensor and one 1d tensor.

        :return one value 
        """
        # seperate img features and tabular data
        img_features, tabular = x
        
        # Apply shared layers to each element of the set
        output = self.shared_layers(img_features)
        
        # Permutation-invariant aggregation
        v, _ = torch.max(output, dim=0)
        
        # concatenate tabular data and feature from bag of images
        conc = torch.concat([v, tabular], dim=-1)
        
        # Apply permutation-invariant layer
        output = self.invariant_layer(conc)
        
        return output

# Introduction 

In [9]:
annotation_file = os.path.join("/kaggle/input/dlmi-challenge/clinical_annotation.csv")
df_ann = pd.read_csv(annotation_file)
df_ann.drop("Unnamed: 0", axis=1, inplace=True)

### RAPID PREPROCESSING

# compute age 
def compute_age(x):
    year = int(x[-4:])
    return 2024 - year
    
df_ann["age"] = df_ann.DOB.apply(compute_age)
df_ann.drop("DOB", axis=1, inplace=True)

#encode gender
df_ann["GENDER"] = df_ann["GENDER"].replace('f', "F")
label_encoder = LabelEncoder()
df_ann['GENDER'] = label_encoder.fit_transform(df_ann['GENDER'])

df_ann.set_index("ID", inplace=True)

In [10]:
df_ann.head()

Unnamed: 0_level_0,LABEL,GENDER,LYMPH_COUNT,age
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
P26,1,1,11.2,91
P183,1,1,12.8,82
P89,1,1,9.6,89
P123,1,1,122.6,93
P61,1,0,11.6,93


In [11]:
sub_patients = list(df_ann.loc[df_ann.LABEL==-1].index)

## Train-test split 

Isolate as soon as possible our future test data. As the dataset is imbalanced (70% positive), we make a stratifiy train-test split: 
$ 14 = 20 \times 0.7 $ 

In [15]:
test_patients = list(df_ann[df_ann.LABEL==0].sample(14).index)
test_patients += list(df_ann[df_ann.LABEL==1].sample(20).index)
print("Test size: ", len(test_patients))
print("Test patients: ", test_patients)

NameError: name 'df_ann' is not defined

Define submission, train and test set 

In [13]:
sub_df = df_ann[df_ann.index.isin(sub_patients)]  # submission patients
test_df = df_ann[df_ann.index.isin(test_patients)]  # test patients

# train patients are the others
train_patients = list(set(df_ann.index) - set(test_patients) - set(sub_patients))
train_df = df_ann[df_ann.index.isin(train_patients)]

# Feature extraction with Auto-encoder

## Data loader 

In [14]:
# preparation of data loader
labels = train_df.LABEL.to_dict()
transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
    ])

list_images = []
for pat in train_patients:
    for img in os.listdir(os.path.join(trainset_path, pat)):
        list_images.append((os.path.join(trainset_path, pat, img), pat))
        
print(list_images[0])

('/kaggle/input/dlmi-challenge/trainset/trainset/P1/000050.jpg', 'P1')


In [15]:
dataset = ImageLevelDataset("./data/trainset/", list_images, labels, transform) 
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

## Training 

In [16]:
# Check if GPU is available and set device accordingly
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [17]:
if ae_training:

    # Initialize the model
    model = Autoencoder(latent_size=128).to(device)

    # Define loss function (here MSE)
    criterion = torch.nn.MSELoss()

    # Define optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    # Train the model
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for images, _ in train_loader:
            optimizer.zero_grad()
            images = images.to(device)
            outputs = model(images)
            loss = criterion(outputs, images)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

else:
    
    model = Autoencoder(latent_size=128)
    model.load_state_dict(torch.load(ae_Mehdi, map_location=torch.device(device)))

In [19]:
if ae_training: 
    torch.save(model.state_dict(), 'ae_Mehdi.pt')

## Test auto-encoder performance on test set

In [20]:
test_labels = test_df.LABEL.to_dict()
list_images_test = []
for pat in test_patients:
    for img in os.listdir(os.path.join(trainset_path, pat)):
        list_images_test.append((os.path.join(trainset_path, pat, img), pat))
        
print(list_images_test[:3])

[('/kaggle/input/dlmi-challenge/trainset/trainset/P64/000038.jpg', 'P64'), ('/kaggle/input/dlmi-challenge/trainset/trainset/P64/000040.jpg', 'P64'), ('/kaggle/input/dlmi-challenge/trainset/trainset/P64/000021.jpg', 'P64')]


In [21]:
test_ae_dataset = ImageLevelDataset("./data/trainset/", list_images_test, test_labels, transform) 
test_loader = DataLoader(test_ae_dataset, batch_size=32, shuffle=False)

mse_array = np.zeros(len(test_loader))
with torch.no_grad():
    i = 0
    nb_img = 0
    for images, labels in test_loader:
        nb_img += images.size(0)
        images = images.to(device)
        outputs = model(images)
        mse_ = (outputs - images) ** 2
        mse_array[i] = torch.sum(mse_)
        i += 1

total_nb_pixels = nb_img * 224 * 224
print(f"Test MSE: {np.sum(mse_array) / total_nb_pixels}%")

Test MSE: 8.338154958827155e-05%


# Classification using deep sets

In [22]:
dataset = PatientDataset(
    root_dir=trainset_path, 
    annotation=train_df,
    patients=train_patients, 
    transform=transform
    )
loader = DataLoader(dataset)

In [24]:
clf = DeepSets(
    input_dim=128,# size of embedding space
    hidden_dim=32, 
    output_dim=1
).to(device)
criterion = nn.BCELoss()  # Binary Cross Entropy Loss for binary classification
optimizer = torch.optim.Adam(clf.parameters(), lr=0.001)


num_epochs = 10
for epoch in range(num_epochs): 
    running_loss = 0.0
    
    for data in loader:
        img, tab, label, pat = data
        
        # process image
        img = torch.squeeze(img, dim=0).to(device)
        
        # rabular data
        tab = tab.squeeze().to(device)

        # label
        label = torch.squeeze(label, dim=(0, 1)).to(device)

        # patient
        patient = pat[0]
        
        # zero grad
        optimizer.zero_grad() 
        
        # input of deep sets
        features_img = model.embedding(img)

        clf_output = clf((features_img, tab))  
        loss = criterion(clf_output, label)
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize
        
        # running loss update
        running_loss += loss.item()
        
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(test_loader)}")    
    

  label_tensor = torch.tensor([label], dtype=torch.float32)
  features_tensor = torch.tensor([features], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([lab

Epoch 1/10, Loss: 3.9102158897720494


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 2/10, Loss: 2.301025220253879


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 3/10, Loss: 1.2180079165081095


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 4/10, Loss: 0.8448945098423533


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 5/10, Loss: 0.7735480575356632


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 6/10, Loss: 0.7421655229700264


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 7/10, Loss: 0.717975722121108


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 8/10, Loss: 0.6944658673529245


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 9/10, Loss: 0.6699806796175185


  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

Epoch 10/10, Loss: 0.6439445176998999


In [25]:
torch.save(clf.state_dict(), 'deepsets_model_max.pt')

# Test the model 

In [None]:
dataset = PatientDataset(
    root_dir=trainset_path, 
    annotation=test_df,
    patients=test_patients, 
    transform=transform
    )
loader = DataLoader(dataset)

In [None]:
labels = np.zeros(len(loader))
pred = np.zeros(len(loader))

with torch.no_grad():
    i = 0
    for data in loader:
        img, tab, label, pat = data
        
        # process image
        img = torch.squeeze(img, dim=0).to(device)
        
        # rabular data
        tab = tab.squeeze().to(device)

        # label
        label = torch.squeeze(label, dim=(0, 1)).to(device)

        # patient
        patient = pat[0]
        
        # zero grad
        optimizer.zero_grad() 
        
        # input of deep sets
        features_img = model.embedding(img)

        clf_output = clf((features_img, tab))  

        labels[i] = label.detach().numpy()
        pred[i] = clf_output.detach().numpy()

Measure performance

In [11]:
# get actual pred 
actual_pred = np.where(pred >= 0.5, 1, 0)

# compute balance accuracy 
balanced_acc = balanced_accuracy_score(labels, actual_pred)

print("Balanced accuracy: ", balanced_acc)

Balanced accuracy:  0.8333333333333333


# For submission

In [27]:
if submission:
    # submission loader
    test_dataset = PatientDataset(
        root_dir=testset_path, 
        annotation=sub_df,
        patients=sub_patients, 
        transform=transform
        )
    sub_loader = DataLoader(test_dataset)

    # store results
    dico = {}
    with torch.no_grad():
        
        for data in sub_loader:
            img, tab, label, pat = data
            
            # process image
            img = torch.squeeze(img, dim=0).to(device)
            
            # rabular data
            tab = tab.squeeze().to(device)

            # label
            label = torch.squeeze(label, dim=(0, 1)).to(device)

            # patient
            patient = pat[0]
            
            # input of deep sets
            features_img = model.embedding(img)

            clf_output = clf((features_img, tab))  
            
            dico[patient] = clf_output.cpu().detach().numpy()
            
    print(dico)

  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], dtype=torch.float32)
  label_tensor = torch.tensor([label], d

{'P71': array([0.76742834], dtype=float32), 'P16': array([0.9295041], dtype=float32), 'P114': array([0.9183822], dtype=float32), 'P170': array([0.8825037], dtype=float32), 'P98': array([0.44708008], dtype=float32), 'P69': array([0.31011942], dtype=float32), 'P92': array([0.96759844], dtype=float32), 'P132': array([0.89355636], dtype=float32), 'P81': array([0.9552589], dtype=float32), 'P73': array([0.9454633], dtype=float32), 'P143': array([0.97953206], dtype=float32), 'P175': array([0.807896], dtype=float32), 'P56': array([0.42450717], dtype=float32), 'P139': array([0.70481956], dtype=float32), 'P152': array([0.8599884], dtype=float32), 'P203': array([0.24738945], dtype=float32), 'P75': array([0.98958963], dtype=float32), 'P9': array([0.9241715], dtype=float32), 'P24': array([0.5495216], dtype=float32), 'P4': array([0.31027108], dtype=float32), 'P32': array([0.99986863], dtype=float32), 'P120': array([0.9405227], dtype=float32), 'P138': array([0.9384365], dtype=float32), 'P172': array(

  label_tensor = torch.tensor([label], dtype=torch.float32)


In [28]:
if submission:
    final_df = pd.DataFrame()

    final_df["ID"] = dico.keys()
    final_df["clf_pred"] = dico.values()
    final_df["predicted"] = final_df.clf_pred.apply(lambda x: 1 if x >= 0.5 else 0)
    final_df.drop("clf_pred", axis=1, inplace=True)
    final_df.set_index("ID", inplace=True)

    final_df.head()

Unnamed: 0_level_0,predicted
ID,Unnamed: 1_level_1
P71,1
P16,1
P114,1
P170,1
P98,0


In [29]:
if submission:
    final_df.to_csv("deepsets_max.csv")