In [19]:
import os
from torchvision import models, transforms
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import pandas as pd
import pathlib, torch, tqdm
from sklearn.metrics import f1_score

In [2]:
rootdir = pathlib.Path(os.path.dirname(os.path.abspath('__file__')))

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device('cpu')

In [12]:
def prepare_images_labels(path: str, subset: str):
    """"return a list of image paths and a list of image labels
    subset: either trian, test or hidden_test"""
    images, labels = [], []
    with open(path, 'r') as f:
        for l in f.readlines():
            img_name, label = l.split(' ')
            images.append(rootdir/subset/img_name)
            labels.append(int(label))
    return np.array(images), np.array(labels)


#Then let's make a dataloader for the Resnet18 model
#the transformation below is the special requirement of Resnet18 model
t = transforms.Compose([
        Image.open,
        transforms.Resize((256, 256)),
        transforms.ToTensor(),
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))
])

class vgg_dataset(torch.utils.data.Dataset):
    def __init__(self, x, y): 
        super(vgg_dataset, self).__init__()
        self.x = [t(img_path) for img_path in x]
        self.x = torch.stack(self.x)
        self.y = y
        assert self.x.shape[0] == self.y.shape[0], "the input x and y have different size!"
        
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    def __len__(self):
        return self.x.shape[0]

#Then build a wrapper to make it iterable
class vgg_dataloader(torch.utils.data.DataLoader):
    
    def __init__(self, x, y, batch_size=256, shuffle=False):
        super(vgg_dataloader, self).__init__(dataset=vgg_dataset(x,y), batch_size=batch_size, shuffle=shuffle)


#make a compare function to extract real keys and predictions
#then you can compare them by different ways
def compare(model, dataloader, device):
    torch.cuda.empty_cache()
    ret = np.array([0])
    keys = np.array([0])
    for block in tqdm.tqdm(dataloader):
        image, key = block
        image = image.to(device)
        key = key.to(device)
        
        prediction = model(image)
        _,prediction = torch.max(prediction,dim=1)
        ret = np.concatenate((ret, np.array(prediction.cpu())))
        keys = np.concatenate((keys, np.array(key.cpu())))
    
    return ret[1:], keys[1:]

# Load the trained model

In [5]:
model = models.resnet18(pretrained=True)
model.fc = torch.nn.Sequential(torch.nn.Linear(in_features=512, out_features=2, bias=False),
                               torch.nn.Softmax(dim=1)
                              )
model.to(device)

model.load_state_dict(torch.load('resnet_best.pt', map_location=device))
model.eval()
print(model)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

# Test set prerformance

In [10]:
test_path = rootdir/'labels'/'test_labels.txt'
test_images, test_labels = prepare_images_labels(test_path, 'test')
print(len(test_images), len(test_labels))

test_loader = vgg_dataloader(test_images, test_labels)

2756 2756


In [13]:
predictions, targets = compare(model, test_loader, device)

100%|██████████| 11/11 [04:15<00:00, 23.19s/it]


In [18]:
f1 = f1_score(targets, predictions)
print(f1)

0.9250089702188732


# Hidden test set predictions

In [23]:
test_path = rootdir/'labels'/'sample_submission.csv'
df = pd.read_csv(test_path)
df.head()

Unnamed: 0,img_name,label
0,C164P125ThinF_IMG_20151116_113954_cell_58.png,0
1,C82P43ThinF_IMG_20150817_123824_cell_201.png,0
2,C133P94ThinF_IMG_20151004_155144_cell_125.png,0
3,C152P113ThinF_IMG_20151115_125038_cell_192.png,0
4,C59P20thinF_IMG_20150803_113051_cell_153.png,0


In [26]:
predictions = []
for i in tqdm.tqdm(range(len(df))):
    img_basename = df.iloc[i, 0]
    label = df.iloc[i, 1]

    img_path = rootdir/'hidden_test'/img_basename
    x = t(img_path).to(device).unsqueeze(0)
    prediction = torch.argmax(model(x))
    predictions.append(prediction)

100%|██████████| 2756/2756 [04:29<00:00, 10.24it/s]


In [28]:
predictions = [val.item() for val in predictions]
df['label'] = predictions
df.head()

Unnamed: 0,img_name,label
0,C164P125ThinF_IMG_20151116_113954_cell_58.png,0
1,C82P43ThinF_IMG_20150817_123824_cell_201.png,1
2,C133P94ThinF_IMG_20151004_155144_cell_125.png,1
3,C152P113ThinF_IMG_20151115_125038_cell_192.png,1
4,C59P20thinF_IMG_20150803_113051_cell_153.png,1


In [30]:
df.to_csv('submission_resnet1.csv', index=False)