In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input/"))
# Any results you write to the current directory are saved as output.

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader, sampler
from torchvision import transforms, utils
from PIL import Image
from glob import glob
#create dataset
class HeadCtDS(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.images_path = sorted(glob(os.path.join(root_dir, '*')))
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        image_path = self.images_path[idx]
        image = Image.open(image_path).convert('RGB')
        label = self.df.iloc[idx][-1]
        label = np.array(label)
        if self.transform:
            image = self.transform(image)
        return image, label        

In [None]:
print(torch.cuda.is_available())     
torch.cuda.current_device()   
torch.cuda.get_device_name(0)
torch.cuda.empty_cache()     

In [None]:
#create dataset and load data
img_dir = "../input/head_ct/head_ct/"
csv_file = "../input/labels.csv"
# df = pd.read_csv(csv_file)
# df.head()
# df.iloc[0][-1]
dataset = HeadCtDS(csv_file, img_dir, transform=transforms.Compose([transforms.Resize((224,224)),
                                                                    transforms.RandomGrayscale(),
                                                                    transforms.RandomHorizontalFlip(),
                                                                    transforms.RandomRotation(10),
                                                                    transforms.ToTensor(), 
                                                                    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]))

In [None]:
'''train 80%, val 20%
Divide dataset using samplers
'''
dataset_size = len(dataset)
indices = list(range(dataset_size))
val_split = 0.2
split = int(val_split*dataset_size)
shuffle_dataset = True
random_seed = 1337
if shuffle_dataset:
    np.random.seed(random_seed)
    np.random.shuffle(indices)
train_indices, val_indices = indices[split:], indices[:split]
print(len(train_indices), len(val_indices))
# train_indices
train_sampler = sampler.SubsetRandomSampler(train_indices)
valid_sampler = sampler.SubsetRandomSampler(val_indices)
trainloader = DataLoader(dataset, batch_size=10, sampler=train_sampler)
validloader = DataLoader(dataset, batch_size=10, sampler=valid_sampler)

In [None]:
trainiter = iter(trainloader)
images, label = trainiter.next()
images.shape
plt.figure(figsize = (50,50))
for i in range(4):
    plt.subplot(1, 4, i+1)
    plt.imshow(np.array(images[i]).transpose((1,2,0)), cmap='gray')
print(label)

In [None]:
#creating a model
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = nn.Sequential(nn.Conv2d(3, 32, 3),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, 2))
        self.layer2 = nn.Sequential(nn.Conv2d(32, 32, 3),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, 2),
                                   nn.BatchNorm2d(32))
        self.layer3 = nn.Sequential(nn.Conv2d(32, 64, 3),
                                   nn.ReLU(),
                                   nn.MaxPool2d(2, 2))
        self.fc1 = nn.Linear(64*26*26, 64)
        self.dropout = nn.Dropout(0.5)
        self.bn = nn.BatchNorm1d(64)
        self.fc2 = nn.Linear(64, 2)
        self.prob = nn.LogSoftmax(dim=1)
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(-1, 64*26*26)
        out = F.relu(self.fc1(out))
        out = self.bn(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.prob(out)
        return out
    
model = Net().cuda()

In [None]:
!pip install torchsummary

In [None]:
from torchsummary import summary
summary(model, input_size = (3, 224, 224))

In [None]:
#loss and optimizer
import torch.optim as optim

criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
#train and validate the model
num_epochs = 100
train_loss_list = []
val_loss_list = []
train_acc_list = []
val_acc_list = []
for epoch in range(num_epochs):
    train_loss = 0.0
    val_loss = 0.0
    train_total = 0
    train_correct = 0
    
    model.train(True)
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        optimizer.zero_grad()
        with torch.set_grad_enabled(True):
            outputs = model(inputs).cuda()
            t_loss = criterion(outputs, labels)
        t_loss.backward()
        optimizer.step()
        train_loss += t_loss.item()
        
        #train_accuracy
        _, predicted = torch.max(outputs.data, 1)
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
    train_acc = (100*train_correct/train_total)
    train_loss = train_loss/len(trainloader)
    train_acc_list.append(train_acc)
    train_loss_list.append(train_loss)
    
    model.train(False)
    model.eval()
    val_total = 0
    val_correct = 0
    
    for i, data in enumerate(validloader, 0):
        inputs, labels = data
        inputs = inputs.cuda()
        labels = labels.cuda()
        optimizer.zero_grad()
        with torch.no_grad():
            outputs = model(inputs).cuda()
            v_loss = criterion(outputs, labels)
        val_loss += v_loss.item()
            
        #val_accuracy
        _, predicted = torch.max(outputs.data, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()
    val_acc = (100*val_correct/val_total)
    val_acc_list.append(val_acc)
    val_loss = val_loss/ len(validloader)
    val_loss_list.append(val_loss)
    print('Epoch: {} - train_loss: {} - train_acc: {} - val_loss: {} - val_acc: {} '.format(epoch, train_loss, train_acc, val_loss, val_acc))

In [None]:
epochs = [i for i in range(100)]
plt.title('Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Valid'], loc = 'upper left')
plt.plot(epochs, train_loss_list, 'r', epochs, val_loss_list, 'b')


In [None]:
plt.plot(epochs, train_acc_list, 'r', epochs, val_acc_list, 'b')