In [3]:
# pytorch has alot of pretained models for images, we can use any of them
import torch
from torch import nn
from torch.optim import Adam
from torchvision.transforms import transforms
from torch.utils.data import DataLoader, Dataset
from torchvision import models
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
import numpy as np
import os



cuda


In [4]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cpu


In [49]:
train_df = pd.read_csv('dataset/train.csv')
val_df = pd.read_csv('dataset/val.csv')

In [8]:
# we have the categories already encoded in integers, it is not in string so we do not
# need to do encodig
train_df['category'].unique()

array([0, 1, 2], dtype=int64)

In [9]:
train_df.shape

(1034, 2)

In [13]:
val_df.shape

(133, 2)

In [14]:
train_df['category'].value_counts()

category
2    348
1    345
0    341
Name: count, dtype: int64

In [15]:
# the datset is balanced

In [52]:
# transforms will allow us to preprocess the images
transform = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.ConvertImageDtype(torch.float)
])
# We first resize all the images to the same size and then convert it into torch tensors
# and then the datatype of the numbers inside that tensors as float
#A tensor is the core data structure in PyTorch. It is a container for data that can 
#live on the CPU or GPU, and supports automatic differentiation (gradients) for deep learning.
# pytorch is a complete environment so we should convert all the formats to the library
# specific formats


In [53]:
class CustomImageDataset(Dataset):
    def __init__ (self, dataframe, transform):
        self.dataframe = dataframe
        self.transform = transform
        self.labels = torch.tensor(dataframe['category']).to(device)

    def __len__(self):
        return self.dataframe.shape[0]

    def __getitem__(self, idx):
        img_path = self.dataframe.iloc[idx, 0]
        label = self.labels[idx]
        image = Image.open(img_path)
        if self.transform:
            image = (self.transform(image)/255.0).to(device)

        return image, label
            

In [54]:
train_dataset = CustomImageDataset(dataframe = train_df, transform=transform)
val_dataset = CustomImageDataset(dataframe = val_df, transform=transform)

In [55]:
LR = 1e-3
BATCH_SIZE = 4
EPOCHS = 15
train_loader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [30]:
googlenet_model = models.googlenet(weights = 'DEFAULT')

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to C:\Users\Anas/.cache\torch\hub\checkpoints\googlenet-1378be20.pth
100%|█████████████████████████████████████████████████████████████████████████████| 49.7M/49.7M [00:21<00:00, 2.41MB/s]


In [56]:
# Model is not starting with random weigths as it is already trained, so it will
# help as it wil take less time and computaion as the model is trained

for param in googlenet_model.parameters():
    param.requires_grad = True

In [57]:
googlenet_model.fc

Linear(in_features=1024, out_features=3, bias=True)

In [33]:
# out features are 100 but we need only 3 for this datset

In [58]:
num_classes = len(train_df['category'].unique())
num_classes

3

In [59]:
googlenet_model.fc = torch.nn.Linear(googlenet_model.fc.in_features, num_classes)
googlenet_model.fc

Linear(in_features=1024, out_features=3, bias=True)

In [60]:
googlenet_model.to(device)

GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (inception3a): Inception(
    (branch1): BasicConv2d(
      (conv): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track

In [61]:
loss_function = nn.CrossEntropyLoss()
optimizer = Adam(googlenet_model.parameters(),lr=LR)

In [71]:
total_loss_train_plot = []
total_acc_train_plot = []

for epoch in range(EPOCHS):
    total_acc_train = 0
    total_loss_train= 0

    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = googlenet_model(inputs)
        train_loss = loss_function(outputs, labels)
        total_loss_train += train_loss.item()

        train_loss.backward()
        train_acc = preds = torch.argmax(outputs, dim=1)
        train_acc = (preds == labels).sum().item()

        total_acc_train += train_acc
        optimizer.step()

    total_loss_train_plot.append(round(total_loss_train/1000, 4))
    total_acc_train_plot.append(round(total_acc_train/train_dataset.__len__()*100, 4))
    print(f"Epoch {epoch+1}/{EPOCHS}, Train Loss: {round(total_loss_train/1000, 4)} Train Accuracy: {round(total_acc_train/train_dataset.__len__()*100, 4)}")

Epoch 1/15, Train Loss: 0.227 Train Accuracy: 61.5087
Epoch 2/15, Train Loss: 0.2009 Train Accuracy: 67.6015
Epoch 3/15, Train Loss: 0.1953 Train Accuracy: 68.472
Epoch 4/15, Train Loss: 0.1721 Train Accuracy: 73.9845
Epoch 5/15, Train Loss: 0.1849 Train Accuracy: 69.2456
Epoch 6/15, Train Loss: 0.1628 Train Accuracy: 74.0812
Epoch 7/15, Train Loss: 0.155 Train Accuracy: 75.4352
Epoch 8/15, Train Loss: 0.1457 Train Accuracy: 76.8859
Epoch 9/15, Train Loss: 0.1431 Train Accuracy: 78.4333
Epoch 10/15, Train Loss: 0.1456 Train Accuracy: 77.176
Epoch 11/15, Train Loss: 0.1286 Train Accuracy: 81.1412
Epoch 12/15, Train Loss: 0.1344 Train Accuracy: 80.6576
Epoch 13/15, Train Loss: 0.128 Train Accuracy: 81.1412
Epoch 14/15, Train Loss: 0.1092 Train Accuracy: 84.1393
Epoch 15/15, Train Loss: 0.1229 Train Accuracy: 82.5919


In [74]:
with torch.no_grad():
    total_loss_test = 0
    total_acc_test = 0

    for input, labels in val_loader:
        prediction = googlenet_model(input)

        acc = (torch.argmax(prediction, axis = 1) == labels).sum().item()
        total_acc_test += acc

In [75]:
print(round(total_acc_test/val_dataset.__len__()*100,2))

76.69
