# Preprocess Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms , datasets
from torch.utils.data import DataLoader
from torchvision import models as pretrained_models
from torchvision.transforms.autoaugment import InterpolationMode
from torch.optim import lr_scheduler

In [None]:
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix , accuracy_score , classification_report

In [None]:
from pathlib import Path
import os
from PIL import Image

In [None]:
from tqdm import tqdm

In [None]:
import time

In [None]:
#root = Path('data')
train_dir = Path('/content/drive/MyDrive/Butterfly_project/train/train')
test_dir = Path('/content/drive/MyDrive/Butterfly_project/test')

In [None]:
train_df = pd.read_csv('/content/drive/MyDrive/Butterfly_project/Training_set.csv')

In [None]:
test = pd.read_csv('/content/drive/MyDrive/Butterfly_project/sample_submission.csv')
test_df = pd.DataFrame({
    'filename' : test['ID'],
    'path' : str(test_dir) + '/' + test['ID']
})

In [None]:
train_df.head()

Unnamed: 0,filename,label
0,Image_1.jpg,SOUTHERN DOGFACE
1,Image_2.jpg,ADONIS
2,Image_3.jpg,BROWN SIPROETA
3,Image_4.jpg,MONARCH
4,Image_5.jpg,GREEN CELLED CATTLEHEART


In [None]:
train_df['path'] = str(train_dir)+'/' + train_df['filename']

In [None]:
test_df.head()

Unnamed: 0,filename,path
0,Image_1.jpg,/content/drive/MyDrive/Butterfly_project/test/...
1,Image_2.jpg,/content/drive/MyDrive/Butterfly_project/test/...
2,Image_3.jpg,/content/drive/MyDrive/Butterfly_project/test/...
3,Image_4.jpg,/content/drive/MyDrive/Butterfly_project/test/...
4,Image_5.jpg,/content/drive/MyDrive/Butterfly_project/test/...


In [None]:
class_names = list(train_df.label.unique())

In [None]:
class_names[:5]

['SOUTHERN DOGFACE',
 'ADONIS',
 'BROWN SIPROETA',
 'MONARCH',
 'GREEN CELLED CATTLEHEART']

In [None]:
label_dict = {item:idx for idx , item in enumerate(class_names) }

In [None]:
train_df['class_id'] = train_df['label'].apply(lambda x: label_dict[x])

In [None]:
train_df.head()

Unnamed: 0,filename,label,path,class_id
0,Image_1.jpg,SOUTHERN DOGFACE,/content/drive/MyDrive/Butterfly_project/train...,0
1,Image_2.jpg,ADONIS,/content/drive/MyDrive/Butterfly_project/train...,1
2,Image_3.jpg,BROWN SIPROETA,/content/drive/MyDrive/Butterfly_project/train...,2
3,Image_4.jpg,MONARCH,/content/drive/MyDrive/Butterfly_project/train...,3
4,Image_5.jpg,GREEN CELLED CATTLEHEART,/content/drive/MyDrive/Butterfly_project/train...,4


In [None]:
train_df = shuffle(train_df)

In [None]:
len(train_df)

5000

In [None]:
val_df = train_df[:1000]
train_df = train_df[1000:]

In [None]:
len(train_df)

4000

In [None]:
len(val_df)

1000

# LOADING DATA AND PRETRAINED MODEL

In [None]:
train_transforms = transforms.Compose([
    transforms.Resize((224 , 224)),
    transforms.RandomHorizontalFlip(0.5),
    transforms.RandomRotation((-45,45) , InterpolationMode.NEAREST),
    transforms.ToTensor()
])

In [None]:
test_transforms = transforms.Compose([
    transforms.Resize((224 , 224)),
    transforms.ToTensor()
])

In [None]:
class CustomDataset(torch.utils.data.Dataset):

    def __init__(self , df , transforms):
        self.df = df
        self.transforms = transforms


    def __len__(self):
        return len(self.df)



    def __getitem__(self,index):
        img_transformed = self.transforms(Image.open(self.df.iloc[index]['path']).convert("RGB"))
        class_id = self.df.iloc[index]['class_id']
        return img_transformed , class_id

In [None]:
train_ds = CustomDataset(train_df,train_transforms)
val_ds = CustomDataset(val_df, test_transforms)

In [None]:
BATCH_SIZE = 42

In [None]:
train_dataloader = DataLoader(train_ds , batch_size=BATCH_SIZE , shuffle = True )
val_dataloader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle =True)

In [None]:
def train_one_epoch(dataloader , model , loss_fn , optimizer , lr_scheduler):
    train_loss , train_acc = 0 , 0
    model.train()
    for batch , (x , y) in enumerate(dataloader):

        x , y = x.to(device) , y.to(device)
        preds = model(x)

        loss = loss_fn(preds,y)

        train_loss += loss.item()
        pred_classes = torch.argmax(torch.softmax(preds,dim=1),dim = 1)
        train_acc += torch.sum(pred_classes == y) / len(y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    lr_scheduler.step()
    return train_acc / len(dataloader), train_loss / len(dataloader)

In [None]:
def validate(dataloader , model , loss_fn):
    val_loss , val_acc = 0 , 0
    model.eval()
    with torch.inference_mode():
        for batch , (x, y) in enumerate(dataloader):
            x , y = x.to(device) , y.to(device)

            preds = model(x)
            loss =  loss_fn(preds , y)
            val_loss += loss.item()
            pred_classes = torch.argmax(torch.softmax(preds , dim = 1),dim=1)
            val_acc+= torch.sum(y == pred_classes) / len(y)
    return val_acc / len(dataloader) , val_loss / len(dataloader)

In [None]:
device = 'cpu'

In [None]:
resnet18_model = pretrained_models.resnet18(weights='DEFAULT' )

In [None]:
resnet18_model.fc = nn.Linear(in_features=resnet18_model.fc.in_features,
                               out_features=len(class_names))

In [None]:
resnet18_model.to(device)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
BATCH_SIZE_resnet = 42

epochs_resnet = 300

device = 'cpu'
logs_resnet = {'train_acc':[] , 'train_loss':[],
       'val_acc':[] , 'val_loss':[]}

lr_rate_resnet = 0.001
lr_milstones_resnet = [7 , 14 , 21 , 28 , 35]
optimizer_resnet = torch.optim.Adam(resnet18_model.parameters() , lr=lr_rate_resnet )

multi_step_lr_scheduler_resnet = lr_scheduler.MultiStepLR(optimizer_resnet ,
                                                   milestones=lr_milstones_resnet,
                                                  gamma = 0.1)



criterion_resnet = nn.CrossEntropyLoss()
patience = 14
counter = 0
best_loss = 100_000_000

In [None]:
for epoch in tqdm(range(epochs_resnet)):
    train_acc , train_loss = train_one_epoch(train_dataloader ,
                                            resnet18_model,
                                            criterion_resnet,
                                            optimizer_resnet,
                                            multi_step_lr_scheduler_resnet)
    val_acc , val_loss = validate(val_dataloader , resnet18_model , criterion_resnet)

    logs_resnet['train_acc'].append(train_acc.cpu())
    logs_resnet['train_loss'].append(train_loss)
    logs_resnet['val_acc'].append(val_acc.cpu())
    logs_resnet['val_loss'].append(val_loss)

    print(f"epoch:{epoch}")
    print(f"train_acc: {train_acc:.4f} , train_loss: {train_loss:.4f} / val_acc:{val_acc:.4f} , val_loss:{val_loss:.4f}")

    if best_loss - val_loss > 0.005:
        counter = 0
        best_loss = val_loss
        if Path('/content/drive/MyDrive/Butterfly_project/check_points_resnet').is_dir() == False: os.mkdir('/content/drive/MyDrive/Butterfly_project/check_points_resnet')
        #torch.save(resnet18_model.state_dict() , f'check_points_resnet\\custom_model_1_epoch_{epoch}_acc_{int(val_acc*100)}.pth')
        torch.save({
                      'epoch': epoch,
                      'model_state_dict': resnet18_model.state_dict(),
                      'optimizer_state_dict': optimizer_resnet.state_dict(),
                      'loss': best_loss,
                  }, f'/content/drive/MyDrive/Butterfly_project/check_points_resnet\\custom_model_1_epoch_{epoch}_acc_{int(val_acc*100)}.pth')
    else:
        counter += 1
        if counter >= patience:
            print('Early stop !!!')
            break
    if counter >= 15:
      print('Early stop!!!')
      break

  0%|          | 0/300 [00:00<?, ?it/s]

epoch:0
train_acc: 0.5728 , train_loss: 1.7266 / val_acc:0.4725 , val_loss:2.0219


  0%|          | 1/300 [59:44<297:42:25, 3584.43s/it]

epoch:1
train_acc: 0.7694 , train_loss: 0.7968 / val_acc:0.6381 , val_loss:1.2900


  1%|          | 2/300 [1:20:14<182:03:43, 2199.41s/it]

epoch:2
train_acc: 0.8300 , train_loss: 0.5754 / val_acc:0.7129 , val_loss:1.0506


  1%|▏         | 4/300 [2:02:12<129:23:58, 1573.78s/it]

epoch:3
train_acc: 0.8657 , train_loss: 0.4612 / val_acc:0.6116 , val_loss:1.7078
epoch:4
train_acc: 0.8715 , train_loss: 0.4097 / val_acc:0.7945 , val_loss:0.8877


  2%|▏         | 5/300 [2:22:29<118:25:41, 1445.22s/it]

epoch:5
train_acc: 0.9009 , train_loss: 0.3231 / val_acc:0.7994 , val_loss:0.8325


  2%|▏         | 6/300 [2:42:52<111:51:34, 1369.71s/it]

epoch:6
train_acc: 0.9144 , train_loss: 0.2971 / val_acc:0.8242 , val_loss:0.7391


  2%|▏         | 7/300 [3:03:17<107:38:12, 1322.50s/it]

epoch:7
train_acc: 0.9576 , train_loss: 0.1433 / val_acc:0.8954 , val_loss:0.4308


  3%|▎         | 8/300 [3:23:42<104:44:03, 1291.24s/it]

epoch:8
train_acc: 0.9771 , train_loss: 0.0865 / val_acc:0.9060 , val_loss:0.4180


  3%|▎         | 9/300 [3:44:17<102:58:14, 1273.87s/it]

epoch:9
train_acc: 0.9828 , train_loss: 0.0709 / val_acc:0.9112 , val_loss:0.4066


  4%|▎         | 11/300 [4:25:07<100:07:16, 1247.18s/it]

epoch:10
train_acc: 0.9826 , train_loss: 0.0624 / val_acc:0.9115 , val_loss:0.4107


  4%|▍         | 12/300 [4:45:15<98:50:06, 1235.44s/it] 

epoch:11
train_acc: 0.9883 , train_loss: 0.0503 / val_acc:0.9112 , val_loss:0.4038
epoch:12
train_acc: 0.9875 , train_loss: 0.0507 / val_acc:0.9110 , val_loss:0.3954


  5%|▍         | 14/300 [5:26:22<98:02:57, 1234.19s/it]

epoch:13
train_acc: 0.9911 , train_loss: 0.0416 / val_acc:0.9122 , val_loss:0.4099


  5%|▌         | 15/300 [5:46:49<97:32:35, 1232.12s/it]

epoch:14
train_acc: 0.9897 , train_loss: 0.0406 / val_acc:0.9108 , val_loss:0.4049


# LOAD CHECKPOINT AND CONTINUE TO TRAIN

In [None]:
checkpoint_path = '/content/drive/MyDrive/Butterfly_project/check_points_resnet_custom_model_1_epoch_13_acc_99.pth'
checkpoint = torch.load(checkpoint_path)
resnet18_model.load_state_dict(checkpoint['model_state_dict'])
optimizer_resnet.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch']
best_loss = checkpoint['loss']

# Continue training from the next epoch
for epoch in tqdm(range(epochs_resnet)):
    train_acc , train_loss = train_one_epoch(train_dataloader ,
                                            resnet18_model,
                                            criterion_resnet,
                                            optimizer_resnet,
                                            multi_step_lr_scheduler_resnet)
    val_acc , val_loss = validate(val_dataloader , resnet18_model , criterion_resnet)

    logs_resnet['train_acc'].append(train_acc.cpu())
    logs_resnet['train_loss'].append(train_loss)
    logs_resnet['val_acc'].append(val_acc.cpu())
    logs_resnet['val_loss'].append(val_loss)

    print(f"epoch:{epoch}")
    print(f"train_acc: {train_acc:.4f} , train_loss: {train_loss:.4f} / val_acc:{val_acc:.4f} , val_loss:{val_loss:.4f}")

    if best_loss - val_loss > 0.02:
        counter = 0
        best_loss = val_loss
        if Path('/content/drive/MyDrive/Butterfly_project/check_points_resnet').is_dir() == False: os.mkdir('/content/drive/MyDrive/Butterfly_project/check_points_resnet')
        #torch.save(resnet18_model.state_dict() , f'check_points_resnet\\custom_model_1_epoch_{epoch}_acc_{int(val_acc*100)}.pth')
        torch.save({
                      'epoch': epoch,
                      'model_state_dict': resnet18_model.state_dict(),
                      'optimizer_state_dict': optimizer_resnet.state_dict(),
                      'loss': best_loss,
                  }, f'/content/drive/MyDrive/Butterfly_project/check_points_resnet\\custom_model_1_epoch_{epoch}_acc_{int(val_acc*100)}.pth')
    else:
        counter += 1
        if counter >= patience:
            print('Early stop !!!')
            break
    if counter >= 15:
      print('Early stop!!!')
      break

  0%|          | 1/300 [53:48<268:07:49, 3228.33s/it]

epoch:0
train_acc: 0.9702 , train_loss: 0.1055 / val_acc:0.9762 , val_loss:0.0896


  1%|          | 2/300 [1:18:59<183:38:19, 2218.45s/it]

epoch:1
train_acc: 0.9745 , train_loss: 0.0886 / val_acc:0.9787 , val_loss:0.0909


  1%|          | 3/300 [1:44:16<156:34:51, 1897.95s/it]

epoch:2
train_acc: 0.9798 , train_loss: 0.0727 / val_acc:0.9777 , val_loss:0.0915


  1%|▏         | 4/300 [2:09:33<143:41:33, 1747.61s/it]

epoch:3
train_acc: 0.9814 , train_loss: 0.0643 / val_acc:0.9750 , val_loss:0.0982


  2%|▏         | 5/300 [2:34:45<136:15:04, 1662.73s/it]

epoch:4
train_acc: 0.9836 , train_loss: 0.0598 / val_acc:0.9769 , val_loss:0.1004


  2%|▏         | 6/300 [2:59:54<131:30:21, 1610.28s/it]

epoch:5
train_acc: 0.9865 , train_loss: 0.0490 / val_acc:0.9747 , val_loss:0.1005


  2%|▏         | 7/300 [3:25:04<128:23:54, 1577.59s/it]

epoch:6
train_acc: 0.9884 , train_loss: 0.0449 / val_acc:0.9769 , val_loss:0.1001


  3%|▎         | 8/300 [3:50:16<126:15:44, 1556.66s/it]

epoch:7
train_acc: 0.9913 , train_loss: 0.0378 / val_acc:0.9755 , val_loss:0.1017


  3%|▎         | 9/300 [4:15:30<124:45:56, 1543.49s/it]

epoch:8
train_acc: 0.9918 , train_loss: 0.0383 / val_acc:0.9772 , val_loss:0.0989


  3%|▎         | 10/300 [4:40:34<123:21:04, 1531.26s/it]

epoch:9
train_acc: 0.9925 , train_loss: 0.0366 / val_acc:0.9752 , val_loss:0.1013


  4%|▎         | 11/300 [5:05:43<122:22:55, 1524.48s/it]

epoch:10
train_acc: 0.9922 , train_loss: 0.0355 / val_acc:0.9750 , val_loss:0.1016


  4%|▍         | 12/300 [5:30:51<121:33:28, 1519.47s/it]

epoch:11
train_acc: 0.9913 , train_loss: 0.0359 / val_acc:0.9767 , val_loss:0.1037


  4%|▍         | 12/300 [5:39:01<135:36:35, 1695.12s/it]


KeyboardInterrupt: 

# LOAD CHECKPOINT AND PREDICT IMAGE

In [None]:
from torchvision import models
import torch

resnet18_model = models.resnet18(weights=False)
num_classes = 75
resnet18_model.fc = torch.nn.Linear(resnet18_model.fc.in_features, num_classes)



In [None]:
checkpoint_path = '/content/drive/MyDrive/Butterfly_project/check_points_resnetcustom_model_1_epoch_0_acc_97.pth'
checkpoint = torch.load(checkpoint_path)
resnet18_model.load_state_dict(checkpoint['model_state_dict'])


<All keys matched successfully>

In [None]:
resnet18_model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:
list_result = []

In [None]:
preprocess = transforms.Compose([
    transforms.Resize((224 , 224)),
    transforms.ToTensor()
])

for i in test_df['path']:
  input_image = Image.open(i)
  input_tensor = preprocess(input_image)
  input_tensor = input_tensor.unsqueeze(0)  # Create a mini-batch as expected by the model

  # Make prediction
  with torch.no_grad():
      output = resnet18_model(input_tensor)
      _, predicted = torch.max(output, 1)
      list_result.append(class_names[predicted.item()])
      print(f'Predicted class: {class_names[predicted.item()]}')

Predicted class: SCARCE SWALLOW
Predicted class: STRAITED QUEEN
Predicted class: ZEBRA LONG WING
Predicted class: RED SPOTTED PURPLE
Predicted class: RED SPOTTED PURPLE
Predicted class: GREAT JAY
Predicted class: IPHICLUS SISTER
Predicted class: CAIRNS BIRDWING
Predicted class: STRAITED QUEEN
Predicted class: STRAITED QUEEN
Predicted class: MALACHITE
Predicted class: PURPLISH COPPER
Predicted class: EASTERN COMA
Predicted class: PAPER KITE
Predicted class: AN 88
Predicted class: RED POSTMAN
Predicted class: EASTERN COMA
Predicted class: ZEBRA LONG WING
Predicted class: MILBERTS TORTOISESHELL
Predicted class: CLOUDED SULPHUR
Predicted class: GREY HAIRSTREAK
Predicted class: AMERICAN SNOOT
Predicted class: POPINJAY
Predicted class: RED SPOTTED PURPLE
Predicted class: CRIMSON PATCH
Predicted class: POPINJAY
Predicted class: PAINTED LADY
Predicted class: MALACHITE
Predicted class: LARGE MARBLE
Predicted class: AN 88
Predicted class: APPOLLO
Predicted class: RED ADMIRAL
Predicted class: GRE

In [None]:
len(list_result)

1499

In [None]:
predicted_result = pd.DataFrame({
    'ID': test_df['filename'],
    'label': list_result
})

In [None]:
predicted_result.head()

Unnamed: 0,ID,label
0,Image_1.jpg,SCARCE SWALLOW
1,Image_2.jpg,STRAITED QUEEN
2,Image_3.jpg,ZEBRA LONG WING
3,Image_4.jpg,RED SPOTTED PURPLE
4,Image_5.jpg,RED SPOTTED PURPLE


In [None]:
predicted_result.to_csv('/content/drive/MyDrive/Butterfly_project/submission3.csv',index=False)
