In [47]:
from src import preprocess_util
import json
import math
import os

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle as pkl
from PIL import Image
import scipy.misc
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
from torchvision import transforms

%load_ext autoreload 
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [52]:
data_dir = '/workdir/hassony/data'
train_dir = data_dir + '/Training'
save_dir = data_dir + '/storage'

train_df = pd.read_csv('/workdir/hassony/data/Training.csv')

In [67]:
def get_tumor_rec(df, patient_id):
    return int(df[df.Patient_ID==patient_id].Local_tumor_recurrence)

In [12]:
patch_size = 25 
patient_patches = preprocess_util.extract_all_patches(train_dir, patch_size)

In [123]:
# pkl.dump(patient_patches, open(save_dir + '/train_patient_patches.pkl', 'wb'))
patient_patches = pkl.load( open(save_dir + '/train_patient_patches.pkl', 'rb'))

In [195]:
def list_to_tensor(tensor_list):
    tensor_nb = len(tensor_list)
    tensor_shape = tensor_list[0].numpy().shape
    tensor = torch.Tensor(np.zeros([tensor_nb, 1] + list(tensor_shape)))
    print(tensor.numpy().shape)
    for idx in range(len(tensor_list)):
        tensor[idx, 0,] = tensor_list[idx]
    return tensor

def create_train_tensors(patient_patches, patient_df):
    all_patches = []
    all_targets = []
    for patient in patient_patches:
        patches = torch.Tensor(patient_patches[patient])
        tumor_rec = get_tumor_rec(patient_df, patient)
        if(len(patches)):
            nb_patch = patches.numpy().shape[0]
            for idx_patch in range(nb_patch):
                all_patches.append(patches[idx_patch,:,:])
                all_targets.append(torch.Tensor([tumor_rec, 1 - tumor_rec]))
    return list_to_tensor(all_patches), list_to_tensor(all_targets)
            

In [196]:
train_patches, train_target = create_train_tensors(patient_patches, train_df)

(801641, 1, 25, 25)
(801641, 1, 2)


In [81]:
len(train_target)


801641

In [53]:
train_df

Unnamed: 0,Patient_ID,Local_tumor_recurrence,Gender,Age_at_diagnosis,Race,Tumor_side,Tumor_subsite,T_category,N_category,AJCC_Stage,Pathological_grade,Smoking_status_at_diagnosis,Smoking_Pack-Years,Radiation_treatment_course_duration,Total_prescribed_Radiation_treatment_dose,#_Radiation_treatment_fractions,Induction_Chemotherapy,Concurrent_chemotherapy,KM_Overall_survival_censor
0,1,0,Male,58,White,L,Tonsil,2,0,II,III,Former,5.0,41,66,30,N,N,1
1,2,0,Female,78,White,R,BOT,3,0,III,II,Former,70.0,48,70,35,N,Y,0
2,3,0,Male,57,White,R,Tonsil,1,2b,IV,III,Current,30.0,42,66,30,N,Y,1
3,4,0,Female,56,White,R,BOT,2,2b,IV,III,Never,0.0,45,66,30,N,Y,1
4,5,0,Female,60,White,L,Tonsil,2,2b,IV,II,Never,0.0,54,70,33,N,Y,1
5,6,0,Male,66,White,R,BOT,1,1,III,III,Never,0.0,39,66,30,N,Y,1
6,7,0,Female,72,White,L,BOT,4,0,IV,II,Former,26.0,47,70,33,N,Y,1
7,8,0,Male,77,White,R,Tonsil,1,2a,IV,,Never,0.0,42,70,30,N,N,1
8,9,0,Female,71,White,L,Tonsil,1,2a,IV,I,Former,50.0,43,66,30,N,N,1
9,10,0,Male,50,White,R,BOT,3,3,IV,II,Never,0.0,43,72,40,Y,N,1


In [88]:
train_patches.size()

torch.Size([801641, 25, 25])

In [93]:
train_target = torch.Tensor(train_target)

In [205]:
train_dataset = torch.utils.data.TensorDataset(train_patches, train_target)
train_loader = torch.utils.data.DataLoader(train_dataset, shuffle=True, batch_size=2)
print(len(train_loader), 'training samples')

400821 training samples


In [182]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        # self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(64, 32)
        self.fc2 = nn.Linear(32, 2)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(F.max_pool2d(x , 2))
        x = F.relu(F.max_pool2d(self.conv2(x), 2))
        x = F.relu(F.max_pool2d(self.conv3(x), 2))
        x = x.view(-1, 64)
        x = F.relu(self.fc1(x))
        x = F.softmax(self.fc2(x))
        print(x.data.numpy().shape)
        return x

In [217]:
def train_net(dataloader, net, loss_criterion, epochNb, optimizer):
    running_loss = 0.0
    prediction = []
    target = []
    for i, data_item in enumerate(dataloader, 0):
        
        # get the inputs
        inputs, labels = data_item
        labels = labels.squeeze()

        # wrap them in Variable
        inputs, labels = Variable(inputs), Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        print('output sizes')
        print(outputs.data.numpy())
        print('label sizes')
        print(labels.data.numpy())
        loss = loss_criterion(outputs, labels)
        loss.backward()        
        optimizer.step()

        for output in outputs:
            if(outputs.data[0,0]>0.5):
                prediction.append(1)
            else:
                prediction.append(0)
        running_loss += loss.data[0]
    return prediction, running_loss


In [218]:
def simulation(dataloader, net, loss_criterion, epochNb, learning_rate):
    train_losses = np.zeros(epochNb)
    optimizer = torch.optim.Adam(net.parameters(), learning_rate)
    for epoch in range(epochNb): # loop over the dataset multiple times
        
        # train
        target, prediction, train_loss = train_net(dataloader, net, loss_criterion,
                                  epochNb, optimizer)
        train_losses[epoch] = train_loss
        
        # print epoch train results
        print('Training loss: {los}'.format(los=train_loss))

    return train_losses

In [None]:
model = Net()

bce_criterion = nn.BCELoss()
epoch = 15
learning_rate = 0.0001
momentum = 0.9
train_losses = simulation(train_loader, model, bce_criterion, epoch, learning_rate)

(2, 2)
output sizes
[[ 0.99727517  0.00272485]
 [ 0.99615985  0.00384015]]
label sizes
[[ 0.  1.]
 [ 0.  1.]]
(2, 2)
output sizes
[[ 0.70564735  0.29435265]
 [ 0.41668755  0.58331245]]
label sizes
[[ 1.  0.]
 [ 1.  0.]]
(2, 2)
output sizes
[[ 0.59953797  0.40046203]
 [ 0.30245471  0.69754529]]
label sizes
[[ 0.  1.]
 [ 0.  1.]]
(2, 2)
output sizes
[[ 0.09669517  0.90330482]
 [ 0.08864937  0.91135061]]
label sizes
[[ 0.  1.]
 [ 1.  0.]]
(2, 2)
output sizes
[[ 0.05752864  0.94247139]
 [ 0.00621462  0.99378538]]
label sizes
[[ 1.  0.]
 [ 0.  1.]]
(2, 2)
output sizes
[[ 0.08612441  0.91387558]
 [ 0.56864285  0.43135715]]
label sizes
[[ 0.  1.]
 [ 0.  1.]]
(2, 2)
output sizes
[[ 0.05257666  0.94742334]
 [ 0.08648298  0.913517  ]]
label sizes
[[ 0.  1.]
 [ 0.  1.]]
(2, 2)
output sizes
[[ 0.03540256  0.96459746]
 [ 0.11214513  0.88785487]]
label sizes
[[ 1.  0.]
 [ 0.  1.]]
(2, 2)
output sizes
[[ 0.04679533  0.95320469]
 [ 0.03935646  0.96064353]]
label sizes
[[ 1.  0.]
 [ 0.  1.]]
(2, 2)
out

In [None]:
nn.Conv2d(1, 32, kernel_size=3)(train_loader.)