In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
import time
from copy import deepcopy
from PIL import Image

import torchvision.transforms as transforms
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

<div style='background-color:#F5FAF5;
            color:#22584A;
            padding:20px;
            padding-left:5%;
            font-size:140%;'>
    <h1 style='text-align:center;'><b>Creating pandas DataFrame for training</b></h1>
</div>

In [None]:
def create_dataset(path):
    dumbells = os.path.join(path, 'Dumbells')
    dums = [x[2] for x in os.walk(dumbells)]
    d_arr = [os.path.join(dumbells, x) for x in dums[0]]
    emachines = os.path.join(path, 'Elliptical Machine')
    emac = [x[2] for x in os.walk(emachines)]
    e_arr = [os.path.join(emachines, x) for x in emac[0]]
    hmachines = os.path.join(path, 'Home Machine')
    hmac = [x[2] for x in os.walk(hmachines)]
    h_arr = [os.path.join(hmachines, x) for x in hmac[0]]
    rbikes = os.path.join(path, 'Recumbent Bike')
    rbik = [x[2] for x in os.walk(rbikes)]
    r_arr = [os.path.join(rbikes, x) for x in rbik[0]]
    label = ['dumbell']*len(d_arr) + ['elliptical machine']*len(e_arr) + ['home machine']*len(h_arr) + ['recumbent bike']*len(r_arr)
    dd = {'images': d_arr+e_arr+h_arr+r_arr,
         'labels': label}
    
    return pd.DataFrame(dd)

In [None]:
path = "/kaggle/input/4-gym-equipment-types-classification-dataset/Gym Data"
df = create_dataset(path)

<div style='background-color:#F5FAF5;
            color:#22584A;
            padding:20px;
            padding-left:5%;
            font-size:140%;'>
    <h1 style='text-align:center;'><b>Mapping</b></h1>
    <p style='text-align:center;font-size:120%'>
        Label to index<br>Index to label
    </p>
</div>

In [None]:
label_index = {'dumbell': 0,
              'elliptical machine': 1,
              'home machine': 2,
              'recumbent bike': 3}

index_label = dict()
k = 0
for i in label_index.keys():
    index_label[k] = i
    k += 1
    
print(label_index)
print(index_label)

In [None]:
df['labels'] = df['labels'].map(label_index)

<div style='background-color:#F5FAF5;
            color:#22584A;
            padding:20px;
            padding-left:5%;
            font-size:140%;'>
    <h1 style='text-align:center;'><b>Dataset pipeline for training</b></h1>
</div>

In [None]:
class CreatePipeline(Dataset):
    def __init__(self, data, transform):
        super(CreatePipeline, self).__init__()
        self.data = data.values
        self.transform = transform
        
    def __len__(self):
        return len(self.data)

    def __getitem__(self, x):
        image, label = self.data[x]
        im = np.asarray(Image.open(image).convert('RGB'))
        if self.transform is not None:
            im = self.transform(im)
        
        return im, label

In [None]:
BATCH = 24
EPOCHS = 15
LR = 0.1
size = 224

In [None]:
transform = transforms.Compose([transforms.ToPILImage(),
                               transforms.ToTensor(),
                               transforms.Resize((size, size)),
                               transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))])

In [None]:
train, test = train_test_split(df, random_state=42, test_size=0.2)

In [None]:
train_ds = CreatePipeline(train, transform)
val_ds = CreatePipeline(test, transform)

In [None]:
train_dl = DataLoader(train_ds, batch_size=BATCH, shuffle=True)
val_dl = DataLoader(val_ds, batch_size=BATCH, shuffle=False)

<div style='background-color:#F5FAF5;
            color:#22584A;
            padding:20px;
            padding-left:5%;
            font-size:140%;'>
    <h1 style='text-align:center;'><b>Model</b> 🚀</h1>
    <p style='text-align:center;
              font-size:120%;'>
        My model is a simple Image Classification model with CNNs<br>There are 3 block</p>
    <h2 style='text-align:center;'>Blocks</h2>
    <p style='text-align:center;'><b>Each block consists of</b></p>
    <div style='text-align:center;'>
        <ul style='display: inline-block;
                   text-align: left;'>
            <li> Convolution layer</li>
            <li>Batch Normalization layer</li>
            <li>Max Pooling layer</li>
            <li>ReLU activation layer</li>
        </ul>
    </div>
</div>

In [None]:
class Block(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel):
        super(Block, self).__init__()
        self.conv = torch.nn.Conv2d(in_channels, out_channels, kernel)
        self.batch = torch.nn.BatchNorm2d(out_channels)
        self.pool = torch.nn.MaxPool2d(kernel+1)
        self.relu = torch.nn.ReLU()
        
    def forward(self, x):
        x = self.conv(x)
        x = self.batch(x)
        x = self.pool(x)
        x = self.relu(x)
        
        return x


class ImgClassifier(torch.nn.Module):
    def __init__(self, in_channels, num_classes):
        super(ImgClassifier, self).__init__()
        self.layers = torch.nn.Sequential(Block(in_channels, 32, 3),
                                         Block(32, 64, 3),
                                         Block(64, 128, 3))
        
        self.fc = torch.nn.Linear(128, num_classes)
        
    def forward(self, x):
        x = self.layers(x)
        x = x.permute(0, 2, 3, 1)
        
        x = self.fc(x)
        x = torch.flatten(x, 1)
        
        return torch.nn.functional.softmax(x, dim=1)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = ImgClassifier(3, 4)
model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
criterion = torch.nn.CrossEntropyLoss()

In [None]:
best_model = deepcopy(model)
al_start = time.time()
best_acc = 0

for i in range(1, EPOCHS+1):
    start = time.time()
    model.train()
    
    train_loss = 0
    train_total = 0
    for idx, (image, label) in enumerate(train_dl):
        optimizer.zero_grad()
        out = model(image)
        loss = criterion(out, label)
        loss.backward()
        train_loss += loss.item()
        train_total += out.size(0)
        optimizer.step()
        
    train_res = train_loss/train_total
    
    model.eval()
    val_loss = 0
    val_total = 0
    val_acc = 0
    with torch.no_grad():
        for idx, (image, label) in enumerate(val_dl):
            out = model(image)
            loss = criterion(out, label)
            val_loss += loss.item()
            val_total += out.size(0)
            val_acc += (label == out.argmax(1)).sum().item()
            
    
    val_res = val_loss/val_total
    acc_val = val_acc/val_total
    
    if acc_val > best_acc:
        best_acc = acc_val
        best_model = deepcopy(model)
        
    end = time.time()
        
    print("Epoch {} || train loss: {} || val loss: {} || val acc: {} || time: {}".format(i,
                                                                                        train_res,
                                                                                        val_res,
                                                                                        acc_val,
                                                                                        end-start))
    
    
al_end = time.time()
print("Total time {}".format(al_end - al_start))

<div style='background-color:#F5FAF5;
            color:#22584A;
            padding:20px;
            padding-left:10%;
            font-size:140%;'>
    <h1 style='text-align:center;'><b>Results</b> 🚀</h1>
    <p style='text-align:center;
              font-size:120%;'>
        So the model trained on a small dataset like this has a very poor accuracy. Its best accuracy score ever is 36% 😥 <br> In my opinion it is <br></p><ul style='text-align:center;'>
    <li>because the model is implemented by myself and trained from scratch</li>
    <li>considering previous point the dataset is very small to train a model from scratch</li>
    </ul>
    <h2 style='text-align:center;'>Suggestions</h2>
    <p style='text-align:center;'><b>Transfer Learning</b>. <br>I suggest to take pre-trained model such as ResNet, VGGNet etc. and fine-tune it to recognize gym equipment</p>
</div>