<div style='text-align:center;
            font-size:180%;'>
    <h1 style='display:inline-block;'>Introduction</h1>
    <p style='display:inline-block;text-align:center;'>
        In this Notebook I fine-tuned ResNet-50 model
        <br>
        In my previous <a href='https://www.kaggle.com/code/a3amat02/gym-equipment-image-classifier-from-scratch'>work</a> I built a simple Image Classification model based on CNN using BatchNormalization, MaxPooling and ReLU layers
        <br>
        But results were bad, ax expected, since the model is trained from scratch and there is a low number of instances
        <br>
        Hence I suggested to use pre-trained models for Image Classification
        <br>
        Thus it is Transfer Learning task
    </p>
</div>

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import time
from copy import deepcopy
from PIL import Image
import matplotlib.pyplot as plt

import torchvision.transforms as transforms
from torchvision.models import resnet50
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

In [None]:
def create_dataset(path):
    dumbells = os.path.join(path, 'Dumbells')
    dums = [x[2] for x in os.walk(dumbells)]
    d_arr = [os.path.join(dumbells, x) for x in dums[0]]
    emachines = os.path.join(path, 'Elliptical Machine')
    emac = [x[2] for x in os.walk(emachines)]
    e_arr = [os.path.join(emachines, x) for x in emac[0]]
    hmachines = os.path.join(path, 'Home Machine')
    hmac = [x[2] for x in os.walk(hmachines)]
    h_arr = [os.path.join(hmachines, x) for x in hmac[0]]
    rbikes = os.path.join(path, 'Recumbent Bike')
    rbik = [x[2] for x in os.walk(rbikes)]
    r_arr = [os.path.join(rbikes, x) for x in rbik[0]]
    label = ['dumbell']*len(d_arr) + ['elliptical machine']*len(e_arr) + ['home machine']*len(h_arr) + ['recumbent bike']*len(r_arr)
    dd = {'images': d_arr+e_arr+h_arr+r_arr,
         'labels': label}
    
    return pd.DataFrame(dd)

In [None]:
path = "/kaggle/input/4-gym-equipment-types-classification-dataset/Gym Data"
df = create_dataset(path)

In [None]:
label_index = {'dumbell': 0,
              'elliptical machine': 1,
              'home machine': 2,
              'recumbent bike': 3}

index_label = dict()
k = 0
for i in label_index.keys():
    index_label[k] = i
    k += 1
    
print(label_index)
print(index_label)

In [None]:
df['labels'] = df['labels'].map(label_index)

In [None]:
class CreatePipeline(Dataset):
    def __init__(self, data, transform):
        super(CreatePipeline, self).__init__()
        self.data = data.values
        self.transform = transform
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, x):
        image, label = self.data[x]
        im = np.asarray(Image.open(image).convert('RGB'))
        if self.transform is not None:
            im = self.transform(im)
        
        return im, label

In [None]:
BATCH = 24
EPOCHS = 15
LR = 0.1
size = 224

In [None]:
transform = transforms.Compose([transforms.ToPILImage(),
                               transforms.ToTensor(),
                               transforms.Resize((size, size)),
                               transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

In [None]:
train, test = train_test_split(df, random_state=42, test_size=0.2)

In [None]:
train_ds = CreatePipeline(train, transform)
val_ds = CreatePipeline(test, transform)

In [None]:
train_dl = DataLoader(train_ds, batch_size=BATCH, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH, shuffle=False)

In [None]:
model = resnet50(weights='IMAGENET1K_V1')
num_ftrs = model.fc.in_features
model.fc = torch.nn.Linear(num_ftrs, 4)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=LR)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
best_model = deepcopy(model)
al_start = time.time()
best_acc = 0
train_history = []
val_history = []
acc_history = []

for i in range(1, EPOCHS+1):
    start = time.time()
    model.train()
    
    train_loss = 0
    train_total = 0
    for idx, (image, label) in enumerate(train_dl):
        optimizer.zero_grad()
        out = model(image)
        loss = criterion(out, label)
        loss.backward()
        train_loss += loss.item()
        train_total += out.size(0)
        optimizer.step()
        
    train_res = train_loss/train_total
    
    model.eval()
    val_loss = 0
    val_total = 0
    val_acc = 0
    with torch.no_grad():
        for idx, (image, label) in enumerate(val_dl):
            out = model(image)
            loss = criterion(out, label)
            val_loss += loss.item()
            val_total += out.size(0)
            val_acc += (label == out.argmax(1)).sum().item()
    val_res = val_loss/val_total
    acc_val = val_acc/val_total
    
    if acc_val > best_acc:
        best_acc = acc_val
        best_model = deepcopy(model)
        
    end = time.time()
    
    train_history += [train_res]
    val_history += [val_res]
    acc_history += [acc_val]
        
    print("Epoch {} || train loss: {} || val loss: {} || val acc: {} || time: {}".format(i,
                                                                                        train_res,
                                                                                        val_res,
                                                                                        acc_val,
                                                                                        end-start))
    
    
al_end = time.time()
print("Total time {}".format(al_end - al_start))

<div style='font-size:150%;'>
    <h1 style='text-align:center;'>🚀 Training results 📉📈</h1>
    <p style='text-align:center;font-size:130%'>The Best accuracy score is 88.75%
        <br>
        I posted Notebook where I implemeneted my own Image Classifier using simple CNN based structure
        <br>
        The best accuracy I could get was 36%, due to two facts:
    </p>
    <div style='text-align:center;'>
        <ul style='display:inline-block;text-align:left;'>
            <li>Model is not pre-trained, it is being trained from total scratch</li>
            <li>Due to lack of instances and taking previous point into consideration might lead to poor training and low accracy 📉</li>
        </ul>
    </div>
    <p>Then it was suggested to use pre-trained models such as ResNet or VGGNet<br>
        I prefer ResNet over VGGNet, because
    </p>
    <div style='text-align:center;'>
        <ul style='display:inline-block;'>
            <li>ResNet is faster than VGG</li>
            <li>ResNet is deeper and due to global average pooling is lighter(102MB for ResNet50)</li>
        </ul>
    </div>
</div>

In [None]:
epochs = list(range(1, EPOCHS+1))
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(15, 8))
axes[0].plot(epochs, train_history)
axes[0].plot(epochs, val_history)
axes[0].legend(['Train Loss', 'Validation Loss'])
axes[0].set_title("Train and Validation loss progression")
axes[1].plot(epochs, acc_history)
axes[1].set_title("Accuracy progression")
plt.suptitle("Results", size=30)
plt.show()

<div style='font-size:180%;
            text-align:center;'>
    <h1>Model tested ✅</h1>
</div>

In [None]:
def predict(path):
    im = np.asarray(Image.open(path).convert("RGB"))
    im = transform(im)
    with torch.no_grad():
        model.eval()
        out = model(im.reshape(1, 3, size, size))
        
    return index_label[out.argmax(1).detach().numpy()[0]]

In [None]:
show1 = df.iloc[0, 0]
e1 = df.iloc[0, 1]
show2 = df.iloc[240, 0]
e2 = df.iloc[240, 1]
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(20, 10))
image1 = np.asarray(Image.open(show1).convert("RGB"))
axes[0].imshow(image1)
axes[0].set_title("Predicted value: {}\nExpected: {}".format(predict(show1), index_label[e1]))
image2 = np.asarray(Image.open(show2).convert("RGB"))
axes[1].imshow(image2)
axes[1].set_title("Predicted value: {}\nExpected: {}".format(predict(show2), index_label[e2]))
plt.tight_layout()
plt.show()