In [None]:
import numpy as np 
import pandas as pd
import random

from PIL import Image

import torch
import torch.nn as nn
import torch.utils.data as D
import torch.nn.functional as F
import torch.optim as optim

import torchvision
from torchvision import transforms as T

import torchvision.models as models
import torchvision.transforms as transforms
from torchvision import datasets

from torch.utils.data import Dataset

import tqdm

import warnings
warnings.filterwarnings('ignore')

import os

print(os.listdir("../input"))

manualSeed = 123

np.random.seed(manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

# if you are suing GPU
torch.cuda.manual_seed(manualSeed)
torch.cuda.manual_seed_all(manualSeed)


torch.backends.cudnn.enabled = False 
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

In [None]:
path_data = '../input/recursion-cellular-image-classification/'
device = 'cuda'
batch_size = 16

In [None]:
class ImagesDS(D.Dataset):
    def __init__(self, csv_file, img_dir, mode='train', site=1, channels=[1,2,3,4,5,6]):
        
        df = pd.read_csv(csv_file)
        self.records = df.to_records(index=False)
        self.channels = channels
        self.site = site
        self.mode = mode
        self.img_dir = img_dir
        self.len = df.shape[0]
        
    @staticmethod
    def _load_img_as_tensor(file_name):
        with Image.open(file_name) as img:
            return T.ToTensor()(img)

    def _get_img_path(self, index, channel):
        experiment, well, plate = self.records[index].experiment, self.records[index].well, self.records[index].plate
        return '/'.join([self.img_dir,self.mode,experiment,f'Plate{plate}',f'{well}_s{self.site}_w{channel}.png'])
        
    def __getitem__(self, index):
        paths = [self._get_img_path(index, ch) for ch in self.channels]
        img = torch.cat([self._load_img_as_tensor(img_path) for img_path in paths])
        
        if self.mode == 'train':
            return img, self.records[index].sirna
        else:
            return img, self.records[index].id_code

    def __len__(self):
        """
        Total number of samples in the dataset
        """
        return self.len

In [None]:
ds = ImagesDS(path_data+'/train.csv', path_data)
ds_test = ImagesDS(path_data+'/test.csv', path_data, mode='test')

In [None]:
model_ft = models.resnet101(pretrained=True)
num_ftrs = model_ft.fc.in_features

model_ft.conv1  = nn.Conv2d(6, 64, 7, 2, 3)

model_ft.fc = nn.Linear(num_ftrs, 1108)
model_ft = model_ft.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold = 0.9)

In [None]:
train_len = int(0.8 * 36515)
valid_len = 36515 - train_len

train_dataset, valid_dataset = D.random_split(ds, lengths=[train_len, valid_len])

trainloader = D.DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
validloader = D.DataLoader(valid_dataset, batch_size=8, shuffle=True, num_workers=2)


testloader = D.DataLoader(ds_test, batch_size=8, shuffle=False, num_workers=2)



## Train model

In [None]:

def train_model(model, criterion, optimizer, scheduler, n_epochs = 10):
    
    losses = []
    accuracies = []
    test_accuracies = []
    tmp_acc = 0
    # set the model to train mode initially
    model.train()
    for epoch in range(n_epochs):
        since = time.time()
        running_loss = 0.0
        running_correct = 0.0
        for i, data in enumerate(trainloader, 0):

            # get the inputs and assign them to cuda
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            # calculate the loss/acc later
            running_loss += loss.item()
            running_correct += (labels==predicted).sum().item()

        epoch_duration = time.time()-since
        epoch_loss = running_loss/len(trainloader)
        epoch_acc = 100/32*running_correct/len(trainloader)
        print("Epoch %s, duration: %d s, loss: %.4f, acc: %.4f" % (epoch+1, epoch_duration, epoch_loss, epoch_acc))
        
        losses.append(epoch_loss)
        accuracies.append(epoch_acc)
        
        # switch the model to eval mode to evaluate on test data
        model.eval()
        test_acc = evaluate_model(model)
        test_accuracies.append(test_acc)
        
        # re-set the model to train mode after validating
        model.train()
        if test_acc > tmp_acc:
            torch.save(model, '../working/model.pth')
            tmp_acc = test_acc
        scheduler.step(test_acc)
        since = time.time()
        
    model.eval()
    #model = torch.load('../working/model.pth')
    get_predict(model)
    print('Finished Training')
    model.train()
    return model, losses, accuracies, test_accuracies

In [None]:
def evaluate_model(model):
    correct = 0.0
    total = 0.0
    
    with torch.no_grad():
        for i, data in enumerate(validloader, 0):
            images, labels = data
            
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model_ft(images)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100.0 * correct / total
    print('Accuracy of the network on the validation images: %.2f %%' % (
        test_acc))
    return test_acc

result = []
actual = []
result2 = []
fname = []
preds = []

def get_predict(model):
    correct = 0.0
    total = 0.0
    
    with torch.no_grad():
        for i, data in enumerate(testloader, 0):
            images, _ = data
            images = images.to(device)
            
            outputs = model_ft(images)
            idx = outputs.max(dim=-1)[1].cpu().numpy()
            preds.append(idx)
    

In [None]:
import time

model_ft = torch.load('../input/change-to-resnext-pretrained-model/model2.pth')

model_ft, training_losses, training_accs, test_accs = train_model(model_ft, criterion, optimizer, lrscheduler, n_epochs=8)

torch.save(model_ft, '../working/model2.pth')

## Prediction for test

In [None]:
final = []
for sublist in preds:
    for item in sublist:
        final.append(item)


submission2 = pd.read_csv(path_data + '/test.csv')

submission = pd.DataFrame.from_dict({
    'id_code': submission2['id_code'],
    'sirna': final
})

submission.to_csv('result.csv', index=False)
