In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

from typing import Tuple
from pathlib import Path

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from skimage.io import imread

import albumentations as A

import torch
import torch.nn as nn

from torch.utils.data import Dataset, DataLoader

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory|

In [2]:
LOAD_PATH = Path('data')

In [10]:
train_df = pd.read_csv(Path(LOAD_PATH, 'train.csv'), index_col='Id')
test_df = pd.read_csv(Path(LOAD_PATH, 'test.csv'), index_col='Id')

In [11]:
train_image_paths = list(Path(LOAD_PATH, 'train').glob('*'))
test_image_paths = list(Path(LOAD_PATH, 'test').glob('*'))

In [12]:
def conv2d_size_out(size, kernel_size=5, stride=1):
    """
    Determine the output size after applying convolution operation.
    """
    return (size - (kernel_size - 1) - 1) // stride + 1

In [13]:
img_size = 128

conv_filters = [3, 8, 16, 32]
conv_kernels = [5, 3, 3]

class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.fc_input_size_h = 128
        self.fc_input_size_w = 128
        
        # Conv layers
        self.conv_layers = []
        for i in range(len(conv_filters) - 1):
            self.conv_layers.append(nn.Conv2d(in_channels=conv_filters[i],
                                              out_channels=conv_filters[i + 1],
                                              kernel_size=conv_kernels[i],
                                              stride=2))
            self.conv_layers.append(nn.BatchNorm2d(conv_filters[i + 1]))
            self.conv_layers.append(nn.ReLU())
            self.fc_input_size_h = conv2d_size_out(self.fc_input_size_h, conv_kernels[i], 2)
            self.fc_input_size_w = conv2d_size_out(self.fc_input_size_w, conv_kernels[i], 2)

        self.conv_layers = nn.Sequential(*self.conv_layers)

        # Fully connected layers
        fc_layers = [12 + self.fc_input_size_h * self.fc_input_size_w * conv_filters[-1], 64, 1]
        self.fc_layers = []
        
        for i in range(len(fc_layers) - 1):
            self.fc_layers.append(nn.Linear(fc_layers[i], fc_layers[i + 1]))
            
        self.fc_layers = nn.Sequential(*self.fc_layers)
        
    def forward(self, x1, x2):
        x = self.conv_layers(x1)
        x = x.view(len(x), -1)
        x2 = torch.repeat_interleave(x2, len(x), dim=0)
        x = torch.cat((x, x2), dim=1)
        x = self.fc_layers(x)
        return x

In [14]:
train_transforms = A.Compose([
    A.Resize(img_size, img_size)
])

val_transforms = A.Compose([
    A.Resize(img_size, img_size)
])

In [33]:
class PetDataset(Dataset):
    def __init__(self, image_paths, df, transforms) -> None:
        self.path_names = image_paths
#         self.images = [imread(path) for path in image_paths]
        self.targets = None
        if 'Pawpularity' in df.columns:
            self.targets = df[df.index.isin([p.stem for p in self.path_names])]['Pawpularity'].values
        self.transforms = transforms
        
    def __len__(self) -> int:
        return len(self.path_names)
    
    def __getitem__(self, index: int) -> Tuple:
        image = imread(self.path_names[index])
        image = self.transforms(image=image)['image']
        if self.targets is None:
            return image, self.path_names[index]

        return image, self.path_names[index], self.targets[index]

In [34]:
train_dataset = PetDataset(train_image_paths, train_df, train_transforms)
test_dataset = PetDataset(test_image_paths, test_df, val_transforms)

val_size = int(len(train_dataset) * 0.8)
train_set, val_set = torch.utils.data.random_split(train_dataset, lengths=[len(train_dataset) - val_size, val_size])

In [35]:
train_loader = DataLoader(train_set, batch_size=128, shuffle=True, num_workers=0)
val_loader = DataLoader(val_set, batch_size=128, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=0)

In [36]:
device = 'cuda'
model = CNN().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss(reduction='sum').to(device)

In [37]:
epochs = 50
for epoch in range(epochs):
        # Train
        model.train()
        training_loss = 0.0
        for images, features, target in train_loader:
            optimizer.zero_grad()
            images = images.to(device).permute(0, 3, 1, 2).float()
            target = target.to(device).float()
            output = model(images, features)
            loss = criterion(output, target)
            training_loss += loss.item()
            loss.backward()
            optimizer.step()
            
        # Validate
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, features, target in val_loader:
                images = images.to(device).permute(0, 3, 1, 2).float()
                target = target.to(device).float()
                output = model(images, features)
                loss = criterion(output, target)
                val_loss += loss.item()
                
        training_loss /= len(train_loader)
        val_loss /= len(val_loader)
        print(f'Epoch: {epoch:02d}, train loss: {training_loss:.04f}, val loss: {val_loss:.04f}')

TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'pathlib.WindowsPath'>