In [None]:
import pydicom
import glob
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np

from torch.utils.data import Dataset

def window_image(img, window_center,window_width, intercept, slope):
    img = (img*slope +intercept)
    img_min = window_center - window_width//2
    img_max = window_center + window_width//2
    img[img<img_min] = img_min
    img[img>img_max] = img_max
    return img 

def dcm2np(dcm_file):
    ds = pydicom.dcmread(dcm_file)
    image = ds.pixel_array
    window_center, window_width, intercept, slope = get_windowing(ds)
    #the below line makes all the difference, allowing to see parenchimal details
    image_windowed = window_image(image, window_center, window_width, intercept, slope)
    image_windowed = image_windowed / image_windowed.max()
    return image_windowed

def get_first_of_dicom_field_as_int(x):
    #get x[0] as in int is x is a 'pydicom.multival.MultiValue', otherwise get int(x)
    if type(x) == pydicom.multival.MultiValue:
        return int(x[0])
    else:
        return int(x)

def get_windowing(data):
    dicom_fields = [data[('0028','1050')].value, #window center
                    data[('0028','1051')].value, #window width
                    data[('0028','1052')].value, #intercept
                    data[('0028','1053')].value] #slope
    return [get_first_of_dicom_field_as_int(x) for x in dicom_fields]


def next_batch(pf_loader_p, pf_loader_n, batch_size=100):
    batch = []
    for i in range(batch_size):
        if np.random.rand()>0.55:
            x, y = pf_loader_p.__getitem__(pos_neg=1)
        else:
            x, y = pf_loader_n.__getitem__(pos_neg=0)

        if x.shape == (512, 512):
            batch.append((x,y))

    batch = random.sample(batch,batch_size-20)
    batch_x = np.array([i[0] for i in batch])
    batch_x = batch_x[:,np.newaxis,:]
    batch_y = np.array([[0,1] if i[1]==1 else [1,0] for i in batch])

    return batch_x, batch_y

class PF_Loader(Dataset):
    def __init__(self, df):
        """Constructor for Loader"""
        self.df = df

    def __len__(self):
        return len(self.df) 

    def __getitem__(self, pos_neg=0):
        """Itemgetter for Loader"""
        data = self.df.sample(1)
        img_name = data.iloc[0].PatientID
        file_name = '../input/rsna-intracranial-hemorrhage-detection/stage_1_train_images/ID_'+img_name+'.dcm'
        np_image = dcm2np(file_name)
        label = pos_neg
        return np_image, label


In [None]:
import torch
import torch.nn as nn

class BleedNet(nn.Module):
    def __init__(self, activation='relu'):
        super(BleedNet, self).__init__()
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'elu':
            self.activation = nn.ELU()

        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=6, stride=2, padding=0),
            self.activation,
        )
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=4, stride=1, padding=1),
            self.activation,
            nn.AdaptiveMaxPool2d(60),
        )
        
        self.wrap_up = nn.Sequential(
            nn.Linear(32 * 60 * 60, 512),
            self.activation,
            nn.Linear(512, 2),
        )

    def forward(self, x):
        out = self.layer1(x)
        #print(out.size())
        out = self.layer2(out)
        #print(out.size())
        out = out.reshape(out.size(0), -1)
        #print(out.size())
        out = self.wrap_up(out)
        #print(out.size())
        return out

check_net = False
if check_net:    
    net = BleedNet()
    net.to('cuda')
    fake_input = np.random.rand(4,1,512,512)
    fake_input = torch.from_numpy(fake_input)
    fake_input = fake_input.float().to('cuda')
    net.forward(fake_input)

In [None]:
import torch
torch.cuda.is_available()

In [None]:
import os
import pickle
import random
import glob
from copy import deepcopy
import numpy as np
import torch
from sklearn.model_selection import KFold
from torch.optim.lr_scheduler import ReduceLROnPlateau, ExponentialLR
from torch.utils.data import DataLoader
from torch.utils.data.dataloader import default_collate
from tqdm import tqdm
import torch.nn as nn
import torch.optim as optim
import psutil
import pickle
import os
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.utils import shuffle

def df2pf_loader(df):
    df['Sub_type'] = df['ID'].str.split("_", n = 3, expand = True)[2]
    df['PatientID'] = df['ID'].str.split("_", n = 3, expand = True)[1]
    bleed_subtype_df = df.loc[df['Sub_type'] == 'any']

    df_subtype_pos = bleed_subtype_df.loc[bleed_subtype_df['Label'] == 1]
    df_subtype_neg = bleed_subtype_df.loc[bleed_subtype_df['Label'] == 0]

    pf_loader_pos = PF_Loader(df_subtype_pos)
    pf_loader_neg = PF_Loader(df_subtype_neg)
    return pf_loader_pos, pf_loader_neg


make_split = True

if make_split:
    df = pd.read_csv('../input/rsna-intracranial-hemorrhage-detection/stage_1_train.csv')
    df = shuffle(df)

    msk = np.random.rand(len(df)) < 0.8 #80% for training
    train = df[msk]
    val_test = df[~msk]

    msk_val_test = np.random.rand(len(val_test)) < 0.5
    val = val_test[msk_val_test] #10% val
    test = val_test[~msk_val_test] #10% test

    print('Train size:', len(train))
    print('Val size:', len(val))
    print('Test size:', len(test))

    df = train

#Load data
train_pf_loader_pos, train_pf_loader_neg = df2pf_loader(df.sample(10000)) 

val_pf_loader_pos, val_pf_loader_neg = df2pf_loader(val) 
test_pf_loader_pos, test_pf_loader_neg = df2pf_loader(test) 

#Learning and net parameters
n_batches = 4000
batch_size = 100
lr = 0.01
USE_GPU = True
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Using device: {}'.format(device))
loss_fn = nn.CrossEntropyLoss()
bleed_net = BleedNet()
bleed_net.to(device)
optimizer = optim.SGD(bleed_net.parameters(), lr=lr) #momentum?
from torch.optim.lr_scheduler import StepLR
stepsize = 1000
scheduler = StepLR(optimizer, step_size=stepsize, gamma=0.99)

#Initialize logs
train_loss_log = []
val_loss_log = []
test_loss_log = []


#TRAIN THE MODEL
for i in range(n_batches):
    bleed_net.train()
    x, y = next_batch(train_pf_loader_pos,train_pf_loader_neg,batch_size=batch_size)
    x_train_tensor = torch.from_numpy(x).float().to(device)
    y_train_tensor = torch.from_numpy(y).long().to(device)
    y_train_tensor = y_train_tensor.argmax(dim=1)
    yhat = bleed_net(x_train_tensor)
    yhat_choice = yhat.argmax(dim=1)

    acc = y_train_tensor == yhat_choice
    acc = acc.sum().float() / acc.shape[0]
    loss = loss_fn(yhat, y_train_tensor)
    loss.backward()    
    optimizer.step()
    optimizer.zero_grad()
    scheduler.step()
    print('Loss: {} | Acc: {} | Batch {}/{}'.format(loss.item(),acc,i,n_batches))
    train_loss_log.append((loss.item(),acc))

    if i % 100 == 0:
        bleed_net.eval()
        try:
            x, y = next_batch(val_pf_loader_pos, val_pf_loader_neg, batch_size=20)
        except:
            continue
        x_val_tensor = torch.from_numpy(x).float().to(device)
        y_val_tensor = torch.from_numpy(y).long().to(device)
        y_val_tensor = y_val_tensor.argmax(dim=1)
        yhat = bleed_net(x_val_tensor)
        yhat_choice = yhat.argmax(dim=1)

        acc = y_val_tensor == yhat_choice
        acc = acc.sum().float() / acc.shape[0]
        loss = loss_fn(yhat, y_val_tensor)  
        optimizer.zero_grad()
        print('\n\n\nVALIDATION Loss: {} | Acc: {} | Batch {}/{}\n\n\n'.format(loss.item(),acc,i,n_batches))
        val_loss_log.append((loss.item(),acc))


#FINALLY, TEST IT
print('Evaluating net performance on test split...')
bleed_net.eval()
for test_idx in range(10):
    x, y = next_batch(test_pf_loader_pos, test_pf_loader_neg, batch_size=25)
    x_test_tensor = torch.from_numpy(x).float().to(device)
    y_test_tensor = torch.from_numpy(y).long().to(device)
    y_test_tensor = y_test_tensor.argmax(dim=1)
    yhat = bleed_net(x_test_tensor)
    yhat_choice = yhat.argmax(dim=1)
    acc = y_test_tensor == yhat_choice
    acc = acc.sum().float() / acc.shape[0]
    loss = loss_fn(yhat, y_test_tensor)  
    optimizer.zero_grad()
    print('\n\n\nTEST Loss: {} | Acc: {} | Batch {}/{}\n\n\n'.format(loss.item(),acc,i,n_batches))
    test_loss_log.append((loss.item(),acc))


In [None]:
y_train_tensor

In [None]:
!mkdir models

In [None]:
print('Saving the model...')
import time
timestr = time.strftime("%Y%m%d-%H%M%S")
torch.save(bleed_net.state_dict(), 'models/bleednet_testacc_{}_{}.torch'.format(acc,timestr))
print('Model saved in:','models/bleednet_testacc_{}_{}.torch'.format(acc,timestr))

plt.plot([i[0] for i in train_loss_log])
plt.plot([i[1] for i in val_loss_log])
plt.plot([i[1] for i in test_loss_log])
plt.show()
print('Exiting...')