In [1]:
import os
import random
import math

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import glob
import PIL
from PIL import Image

import timm

import warnings
warnings.filterwarnings('ignore')

In [2]:
!pip install torch torchvision --upgrade --index-url https://download.pytorch.org/whl/cpu

import torch
import torch.nn as nn
from torchvision import transforms
from torchvision import datasets, models
from torch.utils.data import random_split, DataLoader

Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting torch
  Downloading https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting torchvision
  Downloading https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (6.1 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading https://download.pytorch.org/whl/sympy-1.13.3-py3-none-any.whl.metadata (12 kB)
Downloading https://download.pytorch.org/whl/cpu/torch-2.8.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl (184.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.1/184.1 MB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading https://download.pytorch.org/whl/cpu/torchvision-0.23.0%2Bcpu-cp311-cp311-manylinux_2_28_x86_64.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m60.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading https:

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
use_cuda = True if str(device) == "cuda" else "cpu"

In [4]:
trainroot = '/kaggle/input/dog-breed-identification/train'
testroot = '/kaggle/input/dog-breed-identification/test'
sample = '/kaggle/input/dog-breed-identification/sample_submission.csv'
labelsroot = '/kaggle/input/dog-breed-identification/labels.csv'

In [5]:
class_names = pd.read_csv(sample).keys()[1:]
breed2idx = { cls: i for i, cls in enumerate(class_names) }

labels = pd.read_csv(labelsroot)
labels = labels.set_index('id')

In [6]:
df = labels.copy()
df["filepath"] = df.index.map(lambda x: os.path.join(trainroot, x + ".jpg"))
df.head(5)

Unnamed: 0_level_0,breed,filepath
id,Unnamed: 1_level_1,Unnamed: 2_level_1
000bec180eb18c7604dcecc8fe0dba07,boston_bull,/kaggle/input/dog-breed-identification/train/0...
001513dfcb2ffafc82cccf4d8bbaba97,dingo,/kaggle/input/dog-breed-identification/train/0...
001cdf01b096e06d78e9e5112d419397,pekinese,/kaggle/input/dog-breed-identification/train/0...
00214f311d5d2247d5dfe4fe24b2303d,bluetick,/kaggle/input/dog-breed-identification/train/0...
0021f9ceb3235effd7fcde7f7538ed62,golden_retriever,/kaggle/input/dog-breed-identification/train/0...


In [7]:
img_transform = {
    'valid': transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size = 256),
        transforms.RandomRotation(degrees = 30),
        transforms.ColorJitter(),
        transforms.RandomHorizontalFlip(),
        transforms.CenterCrop(size = 224),  
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])  
    ]),
    'test': transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ]),
}

In [8]:
class Configure(torch.utils.data.Dataset):
    def __init__(self, df, tran = None):
        self.df = df
        self.tran = tran

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row['filepath'])
        if self.tran:
            image = self.tran(image)
        label = breed2idx[row["breed"]]
        label = torch.tensor(label, dtype = torch.long)
        return [image, label]

In [9]:
train_dataset = Configure(df = df, tran = img_transform['train'])
val_dataset = Configure(df = df, tran = img_transform['valid'])

In [10]:
batch_size = 20
num_workers = 0

train_loader = DataLoader(
    train_dataset,
    batch_size = batch_size,
    shuffle = True,
    num_workers = num_workers
)

val_loader = DataLoader(
    val_dataset,
    batch_size = batch_size,
    shuffle = False, 
    num_workers = num_workers
)

In [11]:
dataloaders = {
    'train': train_loader,
    'valid': val_loader
}

In [12]:
use_cuda = torch.cuda.is_available()

model_resnet = models.resnet50(pretrained = True)
for param in model_resnet.parameters():
    param.requires_grad = False
    
in_features = model_resnet.fc.in_features
model_resnet.fc = nn.Linear(in_features, 120)

if use_cuda:
    model_resnet = model_resnet.cuda()

print(model_resnet)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 217MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [13]:
model_xception = timm.create_model('xception', pretrained = True)

for param in model_xception.parameters():
    param.requires_grad = False

model_xception.fc = nn.Linear(model_xception.fc.in_features, 120)

if use_cuda:
    model_xception = model_xception.cuda()

print(model_xception)

Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-cadene/xception-43020ad28.pth" to /root/.cache/torch/hub/checkpoints/xception-43020ad28.pth


Xception(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act2): ReLU(inplace=True)
  (block1): Block(
    (skip): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
    (skipbn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (rep): Sequential(
      (0): SeparableConv2d(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
        (pointwise): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
      (3): SeparableConv2d(
        (conv1): Conv

In [14]:
criterion_resnet = nn.CrossEntropyLoss()
grad_parameters_resnet = filter(lambda p: p.requires_grad, model_resnet.parameters())
optimizer_resnet = torch.optim.SGD(grad_parameters_resnet, lr = 1e-2)

In [15]:
criterion_xception = nn.CrossEntropyLoss()
grad_parameters_xception = filter(lambda p: p.requires_grad, model_xception.parameters())
optimizer_xception = torch.optim.SGD(grad_parameters_xception, lr = 1e-2)

In [16]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    valid_loss_min = np.Inf 
    for epoch in range(1, n_epochs + 1):
        train_loss = 0.0
        valid_loss = 0.0
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            loss.backward()
            optimizer.step()
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            if batch_idx % 100 == 0:
                print('Epoch: %d \tBatch: %d \tTraining Loss: %.6f' %(epoch, batch_idx + 1, train_loss))

        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        print('Epoch: {} \tTraining Loss: {:.4f} \tValidation Loss: {:.4f}'.format(epoch, train_loss, valid_loss))
        if valid_loss < valid_loss_min:
            torch.save(model.state_dict(), save_path)
            print('BOOM! Validation loss decreased ({:.4f} --> {:.4f}).  Saving model...'.format(valid_loss_min,valid_loss))
            valid_loss_min = valid_loss    

    return model

In [17]:
n_epochs = 50
model_resnet =  train(n_epochs, dataloaders, model_resnet, optimizer_resnet, criterion_resnet, use_cuda, 'model_resnet.pt')
model_xception =  train(n_epochs, dataloaders, model_xception, optimizer_xception, criterion_xception, use_cuda, 'model_xception.pt')

Epoch: 1 	Batch: 1 	Training Loss: 4.914670
Epoch: 1 	Batch: 101 	Training Loss: 4.674582
Epoch: 1 	Batch: 201 	Training Loss: 4.498480
Epoch: 1 	Batch: 301 	Training Loss: 4.330291
Epoch: 1 	Batch: 401 	Training Loss: 4.166200
Epoch: 1 	Batch: 501 	Training Loss: 4.023600
Epoch: 1 	Training Loss: 4.0124 	Validation Loss: 2.6546
BOOM! Validation loss decreased (inf --> 2.6546).  Saving model...
Epoch: 2 	Batch: 1 	Training Loss: 3.208431
Epoch: 2 	Batch: 101 	Training Loss: 3.184795
Epoch: 2 	Batch: 201 	Training Loss: 3.092531
Epoch: 2 	Batch: 301 	Training Loss: 3.006237
Epoch: 2 	Batch: 401 	Training Loss: 2.932922
Epoch: 2 	Batch: 501 	Training Loss: 2.857128
Epoch: 2 	Training Loss: 2.8506 	Validation Loss: 1.6608
BOOM! Validation loss decreased (2.6546 --> 1.6608).  Saving model...
Epoch: 3 	Batch: 1 	Training Loss: 2.350254
Epoch: 3 	Batch: 101 	Training Loss: 2.461419
Epoch: 3 	Batch: 201 	Training Loss: 2.391841
Epoch: 3 	Batch: 301 	Training Loss: 2.357469
Epoch: 3 	Batch: 40

In [18]:
sample_sub = pd.read_csv(sample)
ids = sample_sub["id"]

In [19]:
prediction = []

model_resnet.eval()
model_xception.eval()

for uid in ids:
    img_path = testroot + '/' + uid + '.jpg'
    img = Image.open(img_path)
    img = img_transform['test'](img)
    img = img.unsqueeze(0).cuda()
    with torch.no_grad():
        out = model_resnet(img)
        out2 = model_xception(img)
    
    out = out.view(-1)
    out2 = out2.view(-1)
    out = torch.softmax(out, dim = 0)
    out2 = torch.softmax(out2, dim = 0)
    avg = (out + out2) / 2
    prediction.append(avg)

In [20]:
print(prediction[0])

tensor([9.7837e-07, 3.3371e-06, 1.7735e-07, 1.6235e-07, 5.1930e-08, 2.8693e-07,
        1.1859e-07, 1.5180e-07, 1.5272e-07, 4.4851e-07, 9.2909e-08, 1.9589e-06,
        3.5342e-07, 6.8743e-04, 1.6465e-07, 4.5137e-07, 1.0123e-06, 4.4958e-08,
        2.4580e-06, 3.6612e-06, 1.3580e-07, 8.3934e-07, 1.4134e-05, 1.6506e-07,
        7.3553e-06, 1.1689e-07, 8.8144e-08, 1.2192e-07, 1.5238e-07, 2.3569e-06,
        4.0852e-07, 2.0610e-06, 1.1903e-06, 1.2577e-06, 1.1899e-07, 5.9255e-08,
        3.4688e-07, 8.2416e-08, 1.4930e-07, 1.6363e-07, 8.5405e-07, 1.2273e-06,
        5.7050e-07, 1.2056e-07, 2.1426e-07, 6.9117e-07, 3.2904e-08, 2.7815e-07,
        8.2899e-08, 3.2971e-07, 3.0321e-07, 2.7778e-07, 2.3298e-06, 2.8824e-07,
        6.9755e-08, 1.2984e-07, 3.5818e-07, 7.3712e-08, 1.7274e-07, 4.3432e-08,
        1.2502e-07, 9.7666e-01, 3.9428e-07, 9.4870e-08, 3.5448e-07, 1.4550e-07,
        1.3153e-07, 7.9408e-08, 2.6143e-08, 2.2895e-07, 5.3785e-05, 1.1422e-07,
        4.1125e-08, 6.4368e-06, 1.1892e-

In [21]:
sample_sub.head(5)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333


In [22]:
new_sub = sample_sub.copy()

In [23]:
prediction = [p.detach().cpu().numpy() for p in prediction]
prediction = np.vstack(prediction)  

In [24]:
new_sub.iloc[:, 1:] = prediction

In [25]:
new_sub.head(5)

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,9.783715e-07,3e-06,1.773483e-07,1.623504e-07,5.192971e-08,2.869335e-07,1.185932e-07,1.517987e-07,1.527176e-07,...,3.058285e-07,6.14977e-07,2.05257e-07,1.845535e-07,2.013001e-07,3.922409e-07,1.005067e-07,2.207841e-07,1.674841e-07,6.018194e-07
1,00102ee9d8eb90812350685311fe5890,3.904235e-06,3e-06,1.349749e-06,3.222111e-06,2.799774e-06,3.205525e-06,2.429732e-06,2.776531e-06,1.035491e-06,...,1.178894e-05,9.557832e-07,2.35404e-06,1.257419e-06,1.265381e-06,8.060605e-07,5.633652e-05,1.393395e-06,8.35505e-07,2.548201e-06
2,0012a730dfa437f5f3613fb75efcd4ce,1.803103e-06,0.000169,2.591239e-06,6.03202e-06,2.033924e-06,2.935269e-05,2.417689e-06,2.832639e-07,5.678557e-05,...,6.179981e-07,4.658894e-06,4.792734e-06,0.0001388795,6.67589e-05,0.00898347,3.536553e-06,2.779912e-06,3.517055e-06,1.412737e-05
3,001510bc8570bbeee98c8d80c8a95ec1,0.003248847,3.3e-05,1.016343e-05,1.129691e-05,0.001574612,0.0002237491,3.482581e-06,8.029431e-05,5.339699e-05,...,0.0001736516,6.907142e-05,3.758727e-05,1.086593e-05,8.272128e-05,1.902035e-05,5.60041e-06,0.0001311913,9.828466e-07,5.627621e-05
4,001a5f3114548acdefa3d4da05474c2e,0.001722875,0.000998,5.912071e-06,6.228637e-06,1.577607e-05,1.095253e-05,3.482179e-05,1.270667e-05,2.673777e-05,...,0.0002001421,1.157864e-05,4.384114e-06,9.607608e-06,5.193697e-06,1.057694e-05,0.0001512679,2.817611e-05,4.176721e-05,0.000279009


In [26]:
new_sub.to_csv('submission.csv', index = False)

In [27]:
print(len(new_sub))

10357
