In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        
#%matplotlib inline
#%config InlineBackend.figure_format = 'retina'

from pylab import *
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import *
from torch.utils.data import SubsetRandomSampler
import matplotlib.pyplot as plt

torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
PATH = '../input/blood-cells/dataset2-master/dataset2-master/images'
transform = transforms.Compose([transforms.Resize((120,120)),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

train_data = datasets.ImageFolder(PATH + '/TRAIN', transform=transform)
test_data = datasets.ImageFolder(PATH + '/TEST', transform=transform)

In [None]:
#fancy data splitting

def get_subset(indices, start, end):
    return indices[start : start + end]

#Define training/validation split for data
TRAIN_PCT, VALIDATION_PCT = 0.8, 0.2
train_count = int(len(train_data) * TRAIN_PCT)
validation_count = int(len(train_data) * VALIDATION_PCT)

indices = torch.randperm(len(train_data))

train_indices = get_subset(indices, 0, train_count)
validation_indices = get_subset(indices, train_count, len(train_data))

dataloaders = {
    "train": torch.utils.data.DataLoader(
        train_data, sampler=SubsetRandomSampler(train_indices), batch_size = 128
    ),
    "validation": torch.utils.data.DataLoader(
        train_data, sampler=SubsetRandomSampler(validation_indices), batch_size = 128
    ),
}

print(len(train_indices))
print(len(validation_indices))

In [None]:
#Convolutional Neural Network
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, 1)
        self.conv2 = nn.Conv2d(16, 16, 3, 1)
        self.conv3 = nn.Conv2d(16, 32, 3, 1)
        self.conv4 = nn.Conv2d(32, 64, 3, 1)
        self.fc1 = nn.Linear(46656, 4666)
        self.fc2 = nn.Linear(4666, 128)
        self.fc3 = nn.Linear(128, 4)
        self.dropout = nn.Dropout()

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.conv4(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.fc3(x)
        output = F.log_softmax(x, dim=1)
        return output

net = Net().to(device)

Parameters to tweak:
- convolution (# layers, filters, etc)
- dropout (rate, position, # layers)

In [None]:
# define loss function and optimizer
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.0001)
#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
epochs = 30
loss_values = []
valid_loss_values = []
train_acc = []
val_acc = []

for epoch in range(epochs):  # loop over the dataset multiple times
    
    tr_correct = 0
    tr_total = 0
    running_loss = 0.0
    
    for i, data in enumerate(dataloaders.get('train'), 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        #training loss and accuracy
        running_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs.data, 1)
        tr_total += labels.size(0)
        tr_correct += (predicted == labels).sum().item()
        
    train_acc.append(100 * tr_correct // tr_total)   
    loss_values.append(running_loss / train_count)
    
    print('TRAINING LOSS: ' + str(loss))
    print(f'TRAINING ACCURACY: {100 * tr_correct // tr_total} %')
        
    correct = 0
    total = 0
    running_valid_loss = 0.0

    with torch.no_grad():
        for data in dataloaders.get('validation'):
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            valid_loss = criterion(outputs, labels)
            running_valid_loss += valid_loss.item() * images.size(0)
            
    val_acc.append(100 * correct // total)
    valid_loss_values.append(running_valid_loss / validation_count)
   
    print('VAL LOSS: ' + str(valid_loss))
    print(f'VAL ACCURACY: {100 * correct // total} %')
    print('------------------------------------------------' + str(epoch))

torch.save(net.state_dict(), '/kaggle/working/WBC_model.pt')
plt.plot(loss_values, label='train loss')
plt.plot(valid_loss_values, label='valid loss')
plt.legend()
plt.show()

plt.plot(train_acc, label='train acc')
plt.plot(val_acc, label='valid acc')
plt.legend()
plt.show()