In [1]:
import os
import random
import math
import numpy as np
import pandas as pd
from PIL import Image, ImageDraw
import cv2

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms.functional as F
from torchvision import datasets, models, transforms
from torchvision.models.detection.retinanet import RetinaNet
from torchvision.models.detection.faster_rcnn import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.retinanet import RetinaNetHead

from sklearn.metrics import roc_auc_score, mean_squared_error
from IPython.display import clear_output
from tqdm import tqdm
import glob
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
train_csv = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_csv = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
sample_submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

In [8]:
data_to_train, data_to_test = train_test_split(train_csv, test_size=0.1, random_state=42)
print(data_to_train.shape[0])
paws = sorted(np.array(data_to_train['Pawpularity']))
equal_bins = paws[::892]
equal_bins = np.append(equal_bins, 100)
equal_bins

In [None]:
def get_transform(train, target_size, norm=True):
    transform = []
    transform.append(transforms.ToTensor())
    transform.append(transforms.Resize(target_size))
    if norm:
        transform.append(transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
    if train:
        transform.append(transforms.RandomHorizontalFlip(p=0.5))
    return transforms.Compose(transform)

In [None]:
class TrainData(Dataset):
    def __init__(self, data, train=True, target_size=(520, 520)):
        self.data = data
        self.train = train
        self.target_size = target_size

    def __len__(self) -> int:
        return len(self.data)
        
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = '../input/petfinder-pawpularity-score/train/' + self.data.iloc[idx, 0] + '.jpg'
        image = Image.open(img_name).convert('RGB')

        transform = get_transform(self.train, self.target_size)
        image = transform(image)
        
        raw_labels = torch.tensor(self.data.iloc[idx, -1])
        
        labels = 0
        if raw_labels < equal_bins[1]:
            labels = 0
        elif raw_labels < equal_bins[2]:
            labels = 1
        elif raw_labels < equal_bins[3]:
            labels = 2
        elif raw_labels < equal_bins[4]:
            labels = 3
        elif raw_labels < equal_bins[5]:
            labels = 4
        elif raw_labels < equal_bins[6]:
            labels = 5
        elif raw_labels < equal_bins[7]:
            labels = 6
        elif raw_labels < equal_bins[8]:
            labels = 7
        elif raw_labels < equal_bins[9]:
            labels = 8
        else:
            labels = 9
            
        
        true_labels = torch.tensor(self.data.iloc[idx, -1])

        sample = {'image': image, 'label': labels, 'true_labels': true_labels}

        return sample

In [None]:
batch_size = 24
train_dataset = TrainData(data_to_train)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TrainData(data_to_test, train=False)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [None]:
def plot_progress(train_losses, train_rmse, test_loss, test_rmse):
    clear_output(True)
    
    f, (ax1, ax2) = plt.subplots(nrows=1, ncols=2)
    f.set_figheight(6)
    f.set_figwidth(12)
    
    ax1.plot(train_losses, label='train loss')
    ax1.plot(test_loss, label='test loss')
    ax1.plot(np.zeros_like(train_losses), '--', label='zero')
    ax1.set_title('Loss')
    ax1.set_ylabel('Loss')
    ax1.set_xlabel('Batch number')
    ax1.legend()
    
    ax2.plot(train_rmse, label='train rmse')
    ax2.plot(test_rmse, label='test rmse')
    ax2.plot(np.zeros_like(train_rmse), '--', label='RMSE=0')
    ax2.plot(np.ones_like(train_rmse) * 0.16, '--', label='RMSE=0.16')
    ax2.set_title('RMSE')
    ax2.set_ylabel('RMSE')
    ax2.set_xlabel('Batch number')
    ax2.legend()

    plt.show()

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
model = models.googlenet(pretrained=True)
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

ct = 0
for child in model.children():
    ct += 1
    if ct < 4:
        for param in child.parameters():
            param.require_grad = False

model = model.to(device)

criterion = nn.CrossEntropyLoss()
#criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
# optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9, weight_decay=0.05)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=60, gamma=0.1) 

In [None]:
epochs = 1

train_rmse = []
test_rmse = []

train_losses = []
test_losses = []

for epoch in range(epochs):
    for train in train_dataloader:
        
        model.train()
        data_train = train['image'].to(device)#.permute(0, 3, 1, 2)
        target_train = train['label'].to(device)
        
        optimizer.zero_grad()
        out_train = model(data_train)
        _, preds_train = torch.max(out_train, 1)
        train_loss = criterion(out_train, target_train.float())

        train_loss.backward()
#         torch.nn.utils.clip_grad_norm_(model.parameters(), 2)
        optimizer.step()
        
        test = next(iter(test_dataloader))
        model.eval()
        with torch.no_grad():
            data_test = test['image'].to(device)#.permute(0, 3, 1, 2)
            target_test = test['label'].to(device)
            out_test = model(data_test)
            _, preds_test = torch.max(out_test, 1)
            test_loss = criterion(out_test, target_test.float())

        y_true = train['true_label'].cpu().numpy()
        y_pred = preds_train.data.cpu()
        y_pred = (equal_bins[y_pred] + equal_bins[y_pred + 1]) / 2
        train_rmse.append(mean_squared_error(y_true, y_pred, squared=False))

        y_true_test = test['true_label'].cpu().numpy()
        y_pred_test = preds_test.data.cpu()
        y_pred_test = (equal_bins[y_pred_test] + equal_bins[y_pred_test + 1]) / 2
        test_rmse.append(mean_squared_error(y_true_test, y_pred_test, squared=False))
        
        train_losses.append(train_loss.item())
        test_losses.append(test_loss.item())
        
        plot_progress(train_losses, train_rmse, test_losses, test_rmse)
        
        exp_lr_scheduler.step()

In [None]:
model.eval()
preprocess = get_transform(False, (520, 520))

for i in tqdm(sample_submission.index):
    img_name = sample_submission['Id'][i]
    image = Image.open('../input/petfinder-pawpularity-score/test/' + img_name + '.jpg').convert('RGB')
    image = preprocess(image)
    with torch.no_grad():
        image = image.unsqueeze_(0).to(device)
        out_test = model(image)
        _, output = torch.max(out_test, 1)
        output = out_test.detach().cpu()[0]
        output = (equal_bins[output] + equal_bins[output + 1]) / 2
        sample_submission.loc[i, 'Pawpularity'] = output
        

In [None]:
sample_submission.to_csv('submission.csv', index=False)

In [None]:
sample_submission