# SNS Take Home Assessment

### Libraries

In [120]:
import numpy as np
import pandas as pd
import torch
import nltk
import os
import shutil
from matplotlib import pyplot as plt
from torchvision import datasets, transforms, models
import torch.nn as nn
from torch.utils.data import DataLoader
import argparse
import torch.optim as optim
from torchvision.datasets import ImageFolder

### Importing dataset

In [2]:
class_names = pd.read_csv("./Stanford Cars Dataset/annotations/class_names.csv", header = None, names = ['Models'])
car_annot = pd.read_csv("./Stanford Cars Dataset/annotations/cars_annos.csv", header = None, names = ['Raw'])

In [3]:
class_names.head()

Unnamed: 0,Models
0,AM General Hummer SUV 2000
1,Acura RL Sedan 2012
2,Acura TL Sedan 2012
3,Acura TL Type-S 2008
4,Acura TSX Sedan 2012


In [4]:
car_annot.head()

Unnamed: 0,Raw
0,Image;x1;y1;x2;y2;class;test
1,000001.jpg;112;7;853;717;1;0
2,000002.jpg;48;24;441;202;1;0
3,000003.jpg;7;4;277;180;1;0
4,000004.jpg;33;50;197;150;1;0


### Pre-Processing

In [5]:
car_annot2 = car_annot.copy()

In [6]:
def split_text(text):
    split = text.split(";")
    return split

In [7]:
car_annot2[["Image","x1", "y1","x2","y2","Class","Test"]] = car_annot2['Raw'].str.split(";", expand = True)

In [8]:
car_annot2 = car_annot2.iloc[1: ,1:]

In [9]:
car_annot2

Unnamed: 0,Image,x1,y1,x2,y2,Class,Test
1,000001.jpg,112,7,853,717,1,0
2,000002.jpg,48,24,441,202,1,0
3,000003.jpg,7,4,277,180,1,0
4,000004.jpg,33,50,197,150,1,0
5,000005.jpg,5,8,83,58,1,0
...,...,...,...,...,...,...,...
16181,016181.jpg,38,36,375,234,196,1
16182,016182.jpg,29,34,235,164,196,1
16183,016183.jpg,25,32,587,359,196,1
16184,016184.jpg,56,60,208,186,196,1


In [10]:
car_annot2["Class"] = pd.to_numeric(car_annot2.Class)

In [11]:
class_names

Unnamed: 0,Models
0,AM General Hummer SUV 2000
1,Acura RL Sedan 2012
2,Acura TL Sedan 2012
3,Acura TL Type-S 2008
4,Acura TSX Sedan 2012
...,...
191,Volkswagen Beetle Hatchback 2012
192,Volvo C30 Hatchback 2012
193,Volvo 240 Sedan 1993
194,Volvo XC90 SUV 2007


In [12]:
models_dict = class_names.to_dict("dict")

In [13]:
models_dict

{'Models': {0: 'AM General Hummer SUV 2000',
  1: 'Acura RL Sedan 2012',
  2: 'Acura TL Sedan 2012',
  3: 'Acura TL Type-S 2008',
  4: 'Acura TSX Sedan 2012',
  5: 'Acura Integra Type R 2001',
  6: 'Acura ZDX Hatchback 2012',
  7: 'Aston Martin V8 Vantage Convertible 2012',
  8: 'Aston Martin V8 Vantage Coupe 2012',
  9: 'Aston Martin Virage Convertible 2012',
  10: 'Aston Martin Virage Coupe 2012',
  11: 'Audi RS 4 Convertible 2008',
  12: 'Audi A5 Coupe 2012',
  13: 'Audi TTS Coupe 2012',
  14: 'Audi R8 Coupe 2012',
  15: 'Audi V8 Sedan 1994',
  16: 'Audi 100 Sedan 1994',
  17: 'Audi 100 Wagon 1994',
  18: 'Audi TT Hatchback 2011',
  19: 'Audi S6 Sedan 2011',
  20: 'Audi S5 Convertible 2012',
  21: 'Audi S5 Coupe 2012',
  22: 'Audi S4 Sedan 2012',
  23: 'Audi S4 Sedan 2007',
  24: 'Audi TT RS Coupe 2012',
  25: 'BMW ActiveHybrid 5 Sedan 2012',
  26: 'BMW 1 Series Convertible 2012',
  27: 'BMW 1 Series Coupe 2012',
  28: 'BMW 3 Series Sedan 2012',
  29: 'BMW 3 Series Wagon 2012',
  30

In [14]:
car_annot2["Class"] = car_annot2["Class"]-1

In [15]:
car_annot2

Unnamed: 0,Image,x1,y1,x2,y2,Class,Test
1,000001.jpg,112,7,853,717,0,0
2,000002.jpg,48,24,441,202,0,0
3,000003.jpg,7,4,277,180,0,0
4,000004.jpg,33,50,197,150,0,0
5,000005.jpg,5,8,83,58,0,0
...,...,...,...,...,...,...,...
16181,016181.jpg,38,36,375,234,195,1
16182,016182.jpg,29,34,235,164,195,1
16183,016183.jpg,25,32,587,359,195,1
16184,016184.jpg,56,60,208,186,195,1


In [16]:
car_annot3 = car_annot2.copy()

In [17]:
# Mapping Class number to Model
car_annot3["Models"] = car_annot3["Class"].map(models_dict['Models'])

In [18]:
car_annot3

Unnamed: 0,Image,x1,y1,x2,y2,Class,Test,Models
1,000001.jpg,112,7,853,717,0,0,AM General Hummer SUV 2000
2,000002.jpg,48,24,441,202,0,0,AM General Hummer SUV 2000
3,000003.jpg,7,4,277,180,0,0,AM General Hummer SUV 2000
4,000004.jpg,33,50,197,150,0,0,AM General Hummer SUV 2000
5,000005.jpg,5,8,83,58,0,0,AM General Hummer SUV 2000
...,...,...,...,...,...,...,...,...
16181,016181.jpg,38,36,375,234,195,1,smart fortwo Convertible 2012
16182,016182.jpg,29,34,235,164,195,1,smart fortwo Convertible 2012
16183,016183.jpg,25,32,587,359,195,1,smart fortwo Convertible 2012
16184,016184.jpg,56,60,208,186,195,1,smart fortwo Convertible 2012


### Split into 2 dataframes - Train and Test

In [19]:
train_annot = car_annot3.loc[(car_annot3["Test"] == "0")]
test_annot = car_annot3.loc[(car_annot3["Test"] == "1")]

In [20]:
train_annot

Unnamed: 0,Image,x1,y1,x2,y2,Class,Test,Models
1,000001.jpg,112,7,853,717,0,0,AM General Hummer SUV 2000
2,000002.jpg,48,24,441,202,0,0,AM General Hummer SUV 2000
3,000003.jpg,7,4,277,180,0,0,AM General Hummer SUV 2000
4,000004.jpg,33,50,197,150,0,0,AM General Hummer SUV 2000
5,000005.jpg,5,8,83,58,0,0,AM General Hummer SUV 2000
...,...,...,...,...,...,...,...,...
16141,016141.jpg,42,9,573,366,195,0,smart fortwo Convertible 2012
16142,016142.jpg,183,4,1186,780,195,0,smart fortwo Convertible 2012
16143,016143.jpg,121,87,465,278,195,0,smart fortwo Convertible 2012
16144,016144.jpg,21,30,468,320,195,0,smart fortwo Convertible 2012


In [21]:
train_annot.to_csv("./Stanford Cars Dataset/annotations/train_annots.csv")

In [22]:
test_annot

Unnamed: 0,Image,x1,y1,x2,y2,Class,Test,Models
46,000046.jpg,14,16,268,169,0,1,AM General Hummer SUV 2000
47,000047.jpg,10,18,89,61,0,1,AM General Hummer SUV 2000
48,000048.jpg,111,54,365,190,0,1,AM General Hummer SUV 2000
49,000049.jpg,9,33,479,276,0,1,AM General Hummer SUV 2000
50,000050.jpg,9,3,93,41,0,1,AM General Hummer SUV 2000
...,...,...,...,...,...,...,...,...
16181,016181.jpg,38,36,375,234,195,1,smart fortwo Convertible 2012
16182,016182.jpg,29,34,235,164,195,1,smart fortwo Convertible 2012
16183,016183.jpg,25,32,587,359,195,1,smart fortwo Convertible 2012
16184,016184.jpg,56,60,208,186,195,1,smart fortwo Convertible 2012


In [23]:
test_annot.to_csv("./Stanford Cars Dataset/annotations/test_annots.csv")

### Move images into train and test folders respectively

In [34]:
movdir = "./Stanford Cars Dataset/car_im_train/"
basedir = "./Stanford Cars Dataset/car_ims/"


count = 0
for i in train_annot["Image"]:
    shutil.copy(basedir+i, movdir+i)
    count += 1
    print("Copied:", count)

Copied:  1
Copied:  2
Copied:  3
Copied:  4
Copied:  5
Copied:  6
Copied:  7
Copied:  8
Copied:  9
Copied:  10
Copied:  11
Copied:  12
Copied:  13
Copied:  14
Copied:  15
Copied:  16
Copied:  17
Copied:  18
Copied:  19
Copied:  20
Copied:  21
Copied:  22
Copied:  23
Copied:  24
Copied:  25
Copied:  26
Copied:  27
Copied:  28
Copied:  29
Copied:  30
Copied:  31
Copied:  32
Copied:  33
Copied:  34
Copied:  35
Copied:  36
Copied:  37
Copied:  38
Copied:  39
Copied:  40
Copied:  41
Copied:  42
Copied:  43
Copied:  44
Copied:  45
Copied:  46
Copied:  47
Copied:  48
Copied:  49
Copied:  50
Copied:  51
Copied:  52
Copied:  53
Copied:  54
Copied:  55
Copied:  56
Copied:  57
Copied:  58
Copied:  59
Copied:  60
Copied:  61
Copied:  62
Copied:  63
Copied:  64
Copied:  65
Copied:  66
Copied:  67
Copied:  68
Copied:  69
Copied:  70
Copied:  71
Copied:  72
Copied:  73
Copied:  74
Copied:  75
Copied:  76
Copied:  77
Copied:  78
Copied:  79
Copied:  80
Copied:  81
Copied:  82
Copied:  83
Copied:  84
C

In [35]:
movdir = "./Stanford Cars Dataset/car_im_test/"
basedir = "./Stanford Cars Dataset/car_ims/"

count = 0
for i in test_annot["Image"]:
    shutil.copy(basedir+i, movdir+i)
    count += 1
    print("Copied:", count)

Copied: 1
Copied: 2
Copied: 3
Copied: 4
Copied: 5
Copied: 6
Copied: 7
Copied: 8
Copied: 9
Copied: 10
Copied: 11
Copied: 12
Copied: 13
Copied: 14
Copied: 15
Copied: 16
Copied: 17
Copied: 18
Copied: 19
Copied: 20
Copied: 21
Copied: 22
Copied: 23
Copied: 24
Copied: 25
Copied: 26
Copied: 27
Copied: 28
Copied: 29
Copied: 30
Copied: 31
Copied: 32
Copied: 33
Copied: 34
Copied: 35
Copied: 36
Copied: 37
Copied: 38
Copied: 39
Copied: 40
Copied: 41
Copied: 42
Copied: 43
Copied: 44
Copied: 45
Copied: 46
Copied: 47
Copied: 48
Copied: 49
Copied: 50
Copied: 51
Copied: 52
Copied: 53
Copied: 54
Copied: 55
Copied: 56
Copied: 57
Copied: 58
Copied: 59
Copied: 60
Copied: 61
Copied: 62
Copied: 63
Copied: 64
Copied: 65
Copied: 66
Copied: 67
Copied: 68
Copied: 69
Copied: 70
Copied: 71
Copied: 72
Copied: 73
Copied: 74
Copied: 75
Copied: 76
Copied: 77
Copied: 78
Copied: 79
Copied: 80
Copied: 81
Copied: 82
Copied: 83
Copied: 84
Copied: 85
Copied: 86
Copied: 87
Copied: 88
Copied: 89
Copied: 90
Copied: 91
Copied: 

### Create subfolders for each car model

In [55]:
newpath_train = "./Stanford Cars Dataset/cars_train_classes/"

for i in pd.unique(train_annot["Models"]):
    newpath_train2 = newpath_train + i
    if not os.path.exists(newpath_train2):
        os.makedirs(newpath_train2)
        print("Created folder:", newpath_train2)
    else:
        print("Folder exists")

Created folder: ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000
Created folder: ./Stanford Cars Dataset/cars_train_classes/Acura RL Sedan 2012
Created folder: ./Stanford Cars Dataset/cars_train_classes/Acura TL Sedan 2012
Created folder: ./Stanford Cars Dataset/cars_train_classes/Acura TL Type-S 2008
Created folder: ./Stanford Cars Dataset/cars_train_classes/Acura TSX Sedan 2012
Created folder: ./Stanford Cars Dataset/cars_train_classes/Acura Integra Type R 2001
Created folder: ./Stanford Cars Dataset/cars_train_classes/Acura ZDX Hatchback 2012
Created folder: ./Stanford Cars Dataset/cars_train_classes/Aston Martin V8 Vantage Convertible 2012
Created folder: ./Stanford Cars Dataset/cars_train_classes/Aston Martin V8 Vantage Coupe 2012
Created folder: ./Stanford Cars Dataset/cars_train_classes/Aston Martin Virage Convertible 2012
Created folder: ./Stanford Cars Dataset/cars_train_classes/Aston Martin Virage Coupe 2012
Created folder: ./Stanford Cars Dataset/cars_tr

In [57]:
newpath_test = "./Stanford Cars Dataset/cars_test_classes/"

for i in pd.unique(train_annot["Models"]):
    newpath_test2 = newpath_test + i
    if not os.path.exists(newpath_test2):
        os.makedirs(newpath_test2)
        print("Created folder:", newpath_test2)
    else:
        print("Folder exists")

Created folder: ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000
Created folder: ./Stanford Cars Dataset/cars_test_classes/Acura RL Sedan 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/Acura TL Sedan 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/Acura TL Type-S 2008
Created folder: ./Stanford Cars Dataset/cars_test_classes/Acura TSX Sedan 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/Acura Integra Type R 2001
Created folder: ./Stanford Cars Dataset/cars_test_classes/Acura ZDX Hatchback 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/Aston Martin V8 Vantage Convertible 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/Aston Martin V8 Vantage Coupe 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/Aston Martin Virage Convertible 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/Aston Martin Virage Coupe 2012
Created folder: ./Stanford Cars Dataset/cars_test_classes/

### Moving images to their specific model folders

In [76]:
movdir = "./Stanford Cars Dataset/cars_train_classes/"
basedir = "./Stanford Cars Dataset/car_im_train/"

unique_models = train_annot["Models"].unique()

# Iterate over each row in the dataframe
count = 0
for index, row in train_annot.iterrows():
    
    file_name = row["Image"]
    model = row["Models"]
    # print(file_name, model)

    # Move the file to the respective subfolder
    src_file_path = os.path.join(basedir, file_name)
    dest_file_path = os.path.join(movdir + model + "/" + file_name)

    shutil.copy(src_file_path, dest_file_path)
    print("Moving file to ", dest_file_path, " Count =", count)
    count += 1

Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000001.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000002.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000003.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000004.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000005.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000006.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000007.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000008.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_train_classes/AM General Hummer SUV 2000/000009.jpg  Count = 0
Moving file to  ./Stanford C

In [77]:
movdir = "./Stanford Cars Dataset/cars_test_classes/"
basedir = "./Stanford Cars Dataset/car_im_test/"

unique_models = test_annot["Models"].unique()

# Iterate over each row in the dataframe
count = 0
for index, row in test_annot.iterrows():
    
    file_name = row["Image"]
    model = row["Models"]
    # print(file_name, model)

    # Move the file to the respective subfolder
    src_file_path = os.path.join(basedir, file_name)
    dest_file_path = os.path.join(movdir + model + "/" + file_name)

    shutil.copy(src_file_path, dest_file_path)
    print("Moving file to ", dest_file_path, " Count =", count)
    count += 1

Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000046.jpg  Count = 0
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000047.jpg  Count = 1
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000048.jpg  Count = 2
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000049.jpg  Count = 3
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000050.jpg  Count = 4
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000051.jpg  Count = 5
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000052.jpg  Count = 6
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000053.jpg  Count = 7
Moving file to  ./Stanford Cars Dataset/cars_test_classes/AM General Hummer SUV 2000/000054.jpg  Count = 8
Moving file to  ./Stanford Cars Datas

## Training model

### Hyperparameters

In [125]:
num_epochs = 10
batch_size = 4
learning_rate = 0.001

### Prepare dataset

In [126]:
# Set the path to your dataset
train_path = './Stanford Cars Dataset/cars_train_classes'
test_path = './Stanford Cars Dataset/cars_test_classes'

# Define the image transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create the training and testing datasets
train_dataset = ImageFolder(train_path, transform=transform)
test_dataset = ImageFolder(test_path, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


### Model Architecture

In [127]:
class CarClassifier(nn.Module):
    def __init__(self, num_classes):
        super(CarClassifier, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.classifier = nn.Sequential(
            nn.Linear(256 * 28 * 28, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# Create an instance of the model
model = CarClassifier(num_classes=len(train_dataset.classes))

### Loss Function and Optimizer

In [128]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

### Train the model

In [129]:
# Move the model to the GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
train_loss_list = []
total_step = len(train_loader)
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Store training loss
        train_loss_list.append(loss.item())

        # Print training progress
        if (i + 1) % 100 == 0:
            print(f"Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{total_step}], Loss: {loss.item():.4f}")


OutOfMemoryError: CUDA out of memory. Tried to allocate 3.06 GiB (GPU 0; 6.00 GiB total capacity; 3.51 GiB already allocated; 663.00 MiB free; 3.53 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

### Plot training losses

In [None]:
plt.plot(range(1, num_epochs + 1), train_loss_list, label='Training Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss over Epochs')
plt.legend()
plt.show()

### Save model

In [None]:
# Save the model
torch.save(model.state_dict(), 'car_classifier.pth')
print("Model saved!")