## Data

In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from PIL import Image

def crop(X):
    res = np.zeros((len(X), len(X[0]), 300, 300))
    for p in range(len(X)):
        for s in range(len(X[p])):
            for i in range(106, 406):
                res[p][s][i - 106] = X[p][s][i][106:406]
    return res


# MRIs
X = np.load('small_data.npy')
X = crop(X)


# labels
y = pd.read_csv('all_target.csv')
y.columns = ['y']

# devide into 2 classes: cut / no cut
y = np.where(y.y < 3, 1, 0)

## Leave 22% of entire patient MRI sets untouched for testing

18 of 81 patients (9 from each group) are removed in order to test the multi-modular model.

In [14]:
# Find all ill and healthy indeces
ill_inds = np.argwhere(y==0).flatten()
hea_inds = np.argwhere(y==1).flatten()

# Choose 9 from each group for further testing 
ill_test_inds = np.random.choice(ill_inds, 9, replace=False)
hea_test_inds = np.random.choice(hea_inds, 9, replace=False)

test_inds = [*ill_test_inds, *hea_test_inds]
train_inds = [i for i in range(len(y)) if i not in test_inds]

X_TEST = X[test_inds]
Y_TEST = y[test_inds]

# leaave the rest and use main data
X = X[train_inds]
y = y[train_inds]

print(f'X shape = {X.shape}')

X shape = (63, 10, 300, 300)


## Balance

In [3]:
np.unique(y, return_counts=True)

(array([0, 1]), array([13, 50]))

In [15]:
import imblearn
from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state=42)

X_temp = X.reshape((630, -1))
y_temp = np.array([[i]*10 for i in y]).reshape((630, -1))

X_smote, y_smote = sm.fit_resample(X_temp, y_temp)

In [5]:
np.unique(y_smote, return_counts=True)

(array([0, 1]), array([500, 500]))

## Transformation

In [16]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split

# train-test split
X_train, X_test, y_train, y_test = train_test_split(X_smote, y_smote, test_size=0.33, random_state=42)

# count classes
print(f'Class counts in test data: {np.unique(y_test, return_counts=True)}')

# From numpy to torch
X_train = torch.from_numpy(X_train)
X_test = torch.from_numpy(X_test)
y_test = torch.from_numpy(y_test)

X_TEST = torch.from_numpy(X_TEST)
Y_TEST = torch.from_numpy(Y_TEST)
X_TEST = X_TEST.to(torch.float32)

# Reshaping
X_train = X_train.unsqueeze(1).reshape((670, 1, 300, 300))
X_test = X_test.unsqueeze(1).reshape((330, 1, 300, 300))

Class counts in test data: (array([0, 1]), array([158, 172]))


## Model Part 1: CNN

In [7]:
class deep_simple(nn.Module):
    def __init__(self, batch_size):
        super().__init__()
        
        self.batch_size = batch_size
        
        self.conv_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
            nn.ReLU(),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),

        )
        
        self.class_layers = nn.Sequential(
            nn.Linear(341056, 1000),
            nn.ReLU(),
            nn.Linear(1000, 100),
            nn.Linear(100, 10),
            nn.Linear(10, 2)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
#         print(x.shape)
        x = self.class_layers(x.reshape((self.batch_size, -1, 341056)))
        
        return x

In [8]:
np.unique(y_train, return_counts=True)

(array([0, 1]), array([347, 323]))

## Training & Validation

In [18]:
# Batches
batch_size = 10

# X_train
X_train_loader = torch.utils.data.DataLoader(
    X_train,
    batch_size=batch_size
)

# y_train
y_train_loader = torch.utils.data.DataLoader(
    y_train,
    batch_size=batch_size
)

ds = deep_simple(batch_size)

criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(ds.parameters(), lr=0.01)

n_epochs = 6
last_acc = 0.
mmds_cnt = 1
best_res = 0.778
for n in range(n_epochs):
    break
    # training
    y_train_iter = iter(y_train_loader)
    cnt_5 = 0
    loss_cnt = 0
    for sample in X_train_loader:
    
        labels = next(y_train_iter)
        preds = ds(sample.to(torch.float32)).view(batch_size, 2)
        
        loss = criterion(preds, labels)
        loss.backward()
        
        optim.step()
        optim.zero_grad()
        
        cnt_5 += 1
        if cnt_5 == 11:
            print(f'loss = {round(loss.item(), 4)}')
            cnt_5 = 0
        
        
        loss_cnt += int(loss < 0.01)
        
    # testing real patients
    res = MMDS(ds, X_TEST, Y_TEST)
    print(f'TEST ACCURACY: {res}')
    if res > best_res:
        torch.save(ds.state_dict(), f'MMDS_{mmds_cnt}_dict.pth')
        mmds_cnt += 1
        best_res = res
        print(f'BEST MMDS TEST ACCURACY: {res}')
    
    # validation
    with torch.no_grad():
        test_preds = ds(X_test.to(torch.float32)).reshape((330, -1))
        cnt = 0
        for i, pred in enumerate(test_preds):
            cnt += torch.argmax(pred) == y_test[i]

        acc = cnt / 390
        print(f'E{n+1}  Accuracy = {round(acc.item(), 5)}')
    
    if torch.abs(acc - last_acc) < 0.000005:
        break
    last_acc = acc
    
#     if loss_cnt > 20:
#         break

In [24]:
y_train_iter = iter(y_train_loader)
next(y_train_iter)

tensor([0, 0, 0, 1, 1, 0, 0, 1, 0, 1])

## Model Part 2: Multi-Modularity

In [9]:
# Multi-Modular Deep-Simple
def MMDS(model, X, y, batch_size=10, return_res=False):
    X = X.unsqueeze(2)
    print(X.shape)
    n = len(y)
    acc = 0
    res = []
    for i, x in enumerate(X):
        out = model(x).reshape(batch_size, 2)
        out = torch.argmax(out, dim=1).sum().item()
        pred = int(out > 5)
        res.append(pred)
        
        acc += (pred == y[i])
    
    acc = (acc / n).item()
    
    if return_res:
        return (res, acc)
    
    return acc

In [13]:
# MMDS(ds, X_TEST, Y_TEST)

In [14]:
# torch.save(ds.state_dict(), 'MMDS_0_dict.pth')

In [10]:
ds_m = deep_simple(batch_size)
ds_m.load_state_dict(torch.load('MMDS_0_dict.pth'))
ds_m.eval()
print()

NameError: name 'batch_size' is not defined

In [83]:
class deep_mod(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.class_layer = nn.Sequential(
            nn.Linear(10, 1000),
            nn.Linear(1000, 1000),
            nn.Linear(1000, 100),
            nn.Linear(100, 10),
            nn.Linear(10, 2)
        )
        
    def forward(self, x):
        x = self.class_layer(x)
        
        return x

In [84]:
X = torch.from_numpy(X).to(torch.float32)
y = torch.from_numpy(y)

In [111]:
ds = deep_simple(batch_size)
ds.load_state_dict(torch.load('full_MMDS_p1_dict.pth'))
ds.eval()

dm = deep_mod()
dm.load_state_dict(torch.load('full_MMDS_p2_dict.pth'))
dm.eval()

best_acc = 0.84
optim = torch.optim.Adam(dm.parameters(), lr=0.008)
mod_cnt = 2
for epoch in range(20):
    print(f'Epoch: {epoch+1}\t avg loss:', end = ' ')
    losses = []
    for i, x in enumerate(X.unsqueeze(2)):
        pat = ds(x)
        pat = pat.reshape(10, 2).argmax(axis=1)
        pat = pat.reshape(-1, 10).to(torch.float32)
        res = dm(pat)
        label = y[i].flatten()
        loss = criterion(res, label)
        loss.backward()

        optim.step()
        optim.zero_grad()

        losses.append(loss.item())
    print(round(np.mean(losses), 4), end = '\t')
    
    #TRAIN
    acc = 0
    c = 0
    for i, x in enumerate(X.unsqueeze(2)):
        pat = ds(x)
        pat = pat.reshape(10, 2).argmax(axis=1)
        pat = pat.reshape(-1, 10).to(torch.float32)
        res = dm(pat).flatten().argmax()
        label = y[i].flatten()
        acc += (res == label[0])
        c += 1
    acc = round(acc.item()/c, 4)
    print(f'train acc: {acc}', end = '\t')
        
    # TEST
    acc = 0
    for i, x in enumerate(X_TEST.unsqueeze(2).to(torch.float32)):
        pat = ds_m(x)
        pat = pat.reshape(10, 2).argmax(axis=1)
        pat = pat.reshape(-1, 10).to(torch.float32)
        res = dm(pat).flatten().argmax()
        label = Y_TEST[i].flatten()
        # print(res, label[0])
        acc += (res == label[0])
    acc = round(acc.item()/18, 4)
    print(f'test acc: {acc}')
    
    if acc > best_acc:
        print(f'\t \t MAX !!!')
        torch.save(dm.state_dict(), f'full_{mod_cnt}_MMDS_p2_dict.pth')
        mod_cnt += 1

Epoch: 1	 avg loss: 5.955	train acc: 0.9841	test acc: 0.7778
Epoch: 2	 avg loss: 2.3636	train acc: 0.9841	test acc: 0.7222
Epoch: 3	 avg loss: 102.4059	train acc: 0.873	test acc: 0.7222
Epoch: 4	 avg loss: 56.4246	train acc: 0.9524	test acc: 0.6667
Epoch: 5	 avg loss: 44.7792	train acc: 0.9841	test acc: 0.7778
Epoch: 6	 avg loss: 284.65	train acc: 0.9683	test acc: 0.7778
Epoch: 7	 avg loss: 241.5045	train acc: 0.9841	test acc: 0.7778
Epoch: 8	 avg loss: 7.2524	train acc: 1.0	test acc: 0.7222
Epoch: 9	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 10	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 11	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 12	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 13	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 14	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 15	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 16	 avg loss: 0.0	train acc: 1.0	test acc: 0.7222
Epoch: 17	 avg loss: 0.0	train acc: 1.0	test acc

In [64]:
# torch.save(ds_m.state_dict(), 'full_MMDS_p1_dict.pth')

In [65]:
# torch.save(dm.state_dict(), 'full_MMDS_p2_dict.pth')

In [95]:
ds = deep_simple(batch_size)
ds.load_state_dict(torch.load('full_MMDS_p1_dict.pth'))
ds.eval()

dm = deep_mod()
dm.load_state_dict(torch.load('full_MMDS_p2_dict.pth'))
dm.eval()

deep_mod(
  (class_layer): Sequential(
    (0): Linear(in_features=10, out_features=1000, bias=True)
    (1): Linear(in_features=1000, out_features=1000, bias=True)
    (2): Linear(in_features=1000, out_features=100, bias=True)
    (3): Linear(in_features=100, out_features=10, bias=True)
    (4): Linear(in_features=10, out_features=2, bias=True)
  )
)

In [96]:
acc = 0
for i, x in enumerate(X_TEST.unsqueeze(2).to(torch.float32)):
    pat = ds_m(x)
    pat = pat.reshape(10, 2).argmax(axis=1)
    pat = pat.reshape(-1, 10).to(torch.float32)
    res = dm(pat).flatten().argmax()
    label = Y_TEST[i].flatten()
    # print(res, label[0])
    acc += (res == label[0])
print(f'test acc: {acc/18}')

test acc: 0.8333333134651184


## Testing prelearnt model

In [12]:
ds0 = deep_simple(10)
ds0.load_state_dict(torch.load('deep_simple_dict.pth'))
ds0.eval()

MMDS(ds0, X_TEST, Y_TEST)

torch.Size([18, 10, 1, 300, 300])


1.0