# Model Training
---

In [5]:
import pandas as pd
from PIL import Image
import torch
import torch.nn as nn
import torch.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

import sys
sys.path.append('../scripts')
from data_utils import labels

In [6]:
# import data

test_labels_cnn = pd.read_csv('../data/labels/test_labels_cnn.csv', index_col=0)
test_labels_tab = pd.read_csv('../data/labels/test_labels_tab.csv', index_col=0)

train_labels_cnn = pd.read_csv('../data/labels/train_labels_cnn.csv', index_col=0)
train_labels_tab = pd.read_csv('../data/labels/train_labels_tab.csv', index_col=0)

val_labels_cnn = pd.read_csv('../data/labels/val_labels_cnn.csv', index_col=0)
val_labels_tab = pd.read_csv('../data/labels/val_labels_tab.csv', index_col=0)

# MLP Model

In [7]:
train_labels_tab.head()

Unnamed: 0,image_path,Atelectasis,Cardiomegaly,Consolidation,Edema,Effusion,Emphysema,Fibrosis,Hernia,Infiltration,Mass,No Finding,Nodule,Pleural_Thickening,Pneumonia,Pneumothorax,follow_up_number,patient_age,patient_gender,view_position
0,C:\Users\reala\.cache\kagglehub\datasets\nih-c...,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,-0.585474,0.663802,1,1
1,C:\Users\reala\.cache\kagglehub\datasets\nih-c...,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,-0.470147,0.663802,1,1
2,C:\Users\reala\.cache\kagglehub\datasets\nih-c...,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,-0.35482,0.663802,1,1
3,C:\Users\reala\.cache\kagglehub\datasets\nih-c...,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,-0.585474,2.046266,1,1
4,C:\Users\reala\.cache\kagglehub\datasets\nih-c...,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,-0.585474,2.106373,1,0


In [8]:
train_labels_tab.iloc[0][16:20]

follow_up_number   -0.585474
patient_age         0.663802
patient_gender             1
view_position              1
Name: 0, dtype: object

In [13]:
train_labels_tab.columns[16:20]

Index(['follow_up_number', 'patient_age', 'patient_gender', 'view_position'], dtype='object')

In [None]:
# convert datasets into PyTorch Tensors using Dataset and DataLoader

# feature columns = [16:20]
# label columns = [1:16]
class MLPDataset(Dataset): 
    def __init__(self, data, feature_cols, label_cols):
        self.data = data
        self.feature_cols = feature_cols
        self.label_cols = label_cols

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        features = self.data.iloc[index][self.feature_cols].values.astype('float32')
        labels = self.data.iloc[index][self.label_cols].values.astype('float32')

        features = torch.tensor(features)
        labels = torch.tensor(labels)
        
        return features, labels


# specified features and labels
feature_cols = ['follow_up_number', 'patient_age', 'patient_gender', 'view_position']
label_cols = labels

MLP_train_dataset = MLPDataset(train_labels_tab, label_cols=label_cols, feature_cols=feature_cols)
MLP_test_dataset = MLPDataset(test_labels_tab, label_cols=label_cols, feature_cols=feature_cols)
MLP_val_dataset = MLPDataset(val_labels_tab, label_cols=label_cols, feature_cols=feature_cols)

# data loading parameters
batch_size = 4
shuffle = True
num_workers = 2

MLP_train_loaded = DataLoader(dataset=MLP_train_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
MLP_test_loaded = DataLoader(dataset=MLP_test_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
MLP_val_loaded = DataLoader(dataset=MLP_val_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)

In [20]:
# developed a multi-label classification multi-lablel perceptron for tabular data

class MLPModel(nn.Module): # two hidden layers
    def __init__(self, n_features, hidden_dim1, hidden_dim2, n_outputs):
        super(MLPModel, self).__init__()
        self.fc1 = nn.Linear(n_features, hidden_dim1)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim2, n_outputs)

    def forward(self, x):
        x = self.relu1(self.fc1(x))
        x = self.relu2(self.fc2(x))
        x = self.fc3(x)
        return x
    
test1 = MLPModel(4, 64, 132, 15)

# CNN Model

In [None]:
class CNNDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, index):
        img = Image.open(self.data["image_path"][index])
        label = self.data.iloc[index][1:16]

        if self.transform:
            img = self.transform(img)
        
        return img, label

# Combined output