# PetFinder.my - Pawpularity Contest- A perfect starter 🤠
[Amit Nikhade](http://amitnikhade.com)

## Flow:
1) Loading data 

2) Pre-processing data

3) Modelling

4) Training

5) Postprocessing

6) Predictions

7) Submission

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# !pip install timm
import os
import cv2
import torch
import random
from torch.utils.data import Dataset, DataLoader
# import timm
from torchvision import models

In [None]:


def seed_everything(seed=2021):
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
seed_everything()

In [None]:
data = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
data

In [None]:
test

In [None]:
data.info()

Visualizaing sample image

In [None]:
#Sample Image
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
img = mpimg.imread('../input/petfinder-pawpularity-score/train/0007de18844b0dbbb5e1f607da0606e0.jpg')
imgplot = plt.imshow(img)
plt.show()

## Augmentation

In [None]:
import torchvision.transforms as transforms
transform_train = transforms.Compose(
    [transforms.ToPILImage(),
     transforms.Resize((224 ,224)),
     transforms.ToTensor(),
     transforms.RandomAffine(15, translate=(0.1, 0.1), scale=(0.9, 1.1)),

     transforms.ColorJitter(contrast=0.8, saturation=0.5),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
)
    ])

transform_valid = transforms.Compose(
    [transforms.ToPILImage(),
     transforms.Resize((224, 224)),
     transforms.ToTensor(),
     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

## Model

In [None]:
import torch.nn.functional as F
import torch.nn as nn
import sys
sys.path.append('../input/pytorch-image-models/pytorch-image-models-master')
import timm 
import torch.optim as optim



class Pet_Model(nn.Module):
    def __init__(self):
        super(Pet_Model, self).__init__()
        self.eff = timm.create_model('twins_pcpvt_base', pretrained=False, in_chans=3)
        self.rlogit = nn.Linear(1000,128)
      
        self.fc1 = nn.Linear(140,64)
        self.fc2 = nn.Linear(64,1)
    
        

   
    def forward(self, image, dense):
        x = image
        x = self.eff(x)  

        
        x = self.rlogit(x)
        x = torch.cat([x, dense], dim=1)
        x = F.relu(x)
        x = self.fc1(x)
#         x = F.relu(x)
#         x = self.fc2(x)
        score = self.fc2(x)
        
        return score
    
model = Pet_Model()

# print(model)
print(timm.list_models())

## Data Generator

In [None]:
class Petfinder_Data(torch.utils.data.Dataset):
    def __init__(self, data, mode=None):
        self.data = data
        self.mode = mode

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image = cv2.imread('../input/petfinder-pawpularity-score/train/'+row[0]+'.jpg')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.mode=='train':
            image = transform_train(image)
        else:
            image = transform_valid(image)
        
        features = torch.tensor(np.array(row[1:13],dtype=np.float32))
        label = torch.tensor(np.array(row[-1:],dtype=np.float32))
        return image, features, label


In [None]:
x = data.drop(columns=['Pawpularity'])
y = data.drop(columns=['Id', 'Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'])

## Preprocessing / training

In [None]:
from torch.autograd import Variable
from torch.optim import lr_scheduler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold
import gc
# optim.
torch.cuda.empty_cache()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# optimizer = optim.SGD(model.parameters(), lr=1e-3, betas=(0.9,0.999), eps=1e-3, weight_decay=1e-4)
optimizer = optim.Adamax(model.parameters(), lr=1e-3, betas=(0.9,0.999), eps=1e-3, weight_decay=1e-4)
scheduler = lr_scheduler.CosineAnnealingLR(optimizer, T_max=20, eta_min=1e-6)
criterion = nn.MSELoss()
model.to(device)
e_=[]
rmse_=[]
valid_rmse_=[]
kfold = StratifiedKFold(n_splits=3)
for train_indicies, valid_indicies in kfold.split(X=x ,y=y):

    train_x, valid_x = data.loc[train_indicies], data.loc[valid_indicies]
    train = Petfinder_Data(train_x,mode='train')
    valid = Petfinder_Data(valid_x,mode='valid')
    train_loader = torch.utils.data.DataLoader(train, batch_size=64, shuffle=False, num_workers=0)
    valid_loader = torch.utils.data.DataLoader(valid, batch_size=64, shuffle=False, num_workers=0)
    for epoch in range(2):
        running_loss = 0.0
        it_num = 0.0
        error = 0.0
        dataset_size = 0
        model.train()
        for i, t in enumerate(train_loader, 0): 
            
            image, features, label = t
            features=Variable(features.cuda().to(torch.float32))
            image=Variable(image.cuda().to(torch.float32))
            label=Variable(label.cuda().to(torch.float32))
        
            batch_size = image.size(0)
        
            outputs = model(image, features)
            outputs=outputs.to(torch.float32)
        
            loss = criterion(outputs, label)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        
            lr = optimizer.param_groups[0]['lr']
            running_loss += loss.item()
            dataset_size += batch_size
            
            if i != 0:
                if running_loss / dataset_size < e_[0] :
                    if mean_squared_error(label.cpu().detach().numpy(), outputs.detach().cpu().numpy(), squared=False)< rmse_[0]:
                        e_.clear()
                        e_.append(running_loss / dataset_size)
                        rmse_.clear()
                        rmse_.append(mean_squared_error(label.cpu().detach().numpy(), outputs.detach().cpu().numpy(), squared=False))
                        print('Epoch:',epoch,'loss: %.3f RMSE: %.3f'%(running_loss / dataset_size,mean_squared_error(label.cpu().detach().numpy(), outputs.detach().cpu().numpy(), squared=False)))
            else:
                rmse_.append(mean_squared_error(label.cpu().detach().numpy(), outputs.detach().cpu().numpy(), squared=False))
                e_.append(running_loss / dataset_size)
                print('Epoch:',epoch,'loss: %.3f RMSE: %.3f'%(running_loss / dataset_size,mean_squared_error(label.cpu().detach().numpy(), outputs.detach().cpu().numpy(), squared=False)))
            running_loss = 0.0

        
        scheduler.step()
        model.eval()
        with torch.no_grad():
            for i, v in enumerate(valid_loader, 0):
                image, features, label = v
                features=Variable(features.cuda().to(torch.float32))
                image=Variable(image.cuda().to(torch.float32))
                label=Variable(label.cuda().to(torch.float32))
    
                outputs = model(image, features)
                outputs=outputs.to(torch.float32)
                error = mean_squared_error(label.cpu().detach().numpy(), outputs.detach().cpu().numpy(), squared=False)
                it_num = i
                
                if i!=0:
                    if error < valid_rmse_[0]:
                        valid_rmse_.clear()
                        valid_rmse_.append(error)
                        print('valid_RMSE:', error )
                else:
                    valid_rmse_.append(error)
                    print('valid_RMSE:', error )
                        
torch.save(model.state_dict(), f'./pet_model.pth')
del model
gc.collect()
torch.cuda.empty_cache()

In [None]:
model =  Pet_Model()
model.load_state_dict(torch.load('./pet_model.pth'))
# model.eval()

## Post Processing / Predictions

In [None]:
class Petfinder_Data(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        image = cv2.imread('../input/petfinder-pawpularity-score/test/'+row[0]+'.jpg')
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = transform_valid(image)
        features = torch.tensor(np.array(row[1:13],dtype=np.float32))
        return image, features
test_df = test.copy()
test=Petfinder_Data(test)
test_loader = torch.utils.data.DataLoader(test)
op=[]
model.to(device)
op.clear()
for i, t in enumerate(test_loader, 0): 
            
        image, features = t
        features=Variable(features.cuda().to(torch.float32))
        image=Variable(image.cuda().to(torch.float32))

        
        outputs = model(image, features)
        outputs=outputs.to(torch.float32)
        
        op.append(outputs.tolist())
        

In [None]:
predictions = np.squeeze(op)

In [None]:
test_df['Pawpularity'] = predictions

In [None]:
test_df = test_df.drop(columns=['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory',
       'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur'])

In [None]:
test_df

## Performance

In [None]:
test_df.to_csv('submission.csv', index=False)