In [None]:
# Imports here
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision.models as models
from PIL import Image
import json
from matplotlib.ticker import FormatStrFormatter
import os
import random
from pickle import load
#from matplotlib.pylab import plt
from numpy import arange

%matplotlib inline

# **Steps** <p>
Step 1: Load Dataset <p>
Step 2: Transform the Dataset <p>
Step 3: Create Model <p>
Step 4: Train Model <p>
Step 5: Save the Model <p>
Step 6: Load the Model <p>
Step 7: Predict the Image <p>
Step 8: Show the result

## Step 1: Load Dataset

In [None]:
# Link of original dataset : https://www.kaggle.com/datasets/jutrera/stanford-car-dataset-by-classes-folder
# The original data does not have a separate validation set. 
# Take out half of the test set to make a validation set.
# The splitting is done uniformly across all the classes.

data_dir = '/home/akm/Cars/Stanford_class/car_data/car_data'
train_dir = data_dir + '/train' # n = 8144
valid_dir = data_dir + '/valid' # n = 4021
test_dir = data_dir + '/test' # n = 4020

In [None]:
# DO ONLY ONCE AT THE BEGINNING
'''
source = '/home/akm/Cars/Stanford_class/car_data/car_data/test'

destination = '/home/akm/Cars/Stanford_class/car_data/car_data/valid'

alldirs = os.listdir(source)

for i in range(len(alldirs)):
    dirpath = os.path.join(source,alldirs[i])
    allfiles = os.listdir(dirpath)
    allfiles_dic = {x : allfiles[x] for x in range(len(allfiles))}
    
    num_samples = len(allfiles)
    num_valid = int(round(num_samples * 0.5))
    valid_indexes = random.sample(range(num_samples), num_valid)
    validfiledic = []
    validfiledic = {x : allfiles[x] for x in valid_indexes}
    for key in validfiledic:
        src_path = os.path.join(dirpath, validfiledic[key])
        dst_dir = os.path.join(destination,alldirs[i])
        if not os.path.exists(dst_dir):
                    os.makedirs(dst_dir)
        dst_path = os.path.join(destination,alldirs[i], validfiledic[key])
        #print(key, validfiledic[key],dirpath, src_path, dst_dir, dst_path)
        shutil.move(src_path, dst_path)
        
'''

## Step 2: Transform the Dataset

In [None]:
# Training transform includes random rotation and flip to build a more robust model
train_transforms = transforms.Compose([transforms.Resize((244,244)), # all pretrained models take 244*244 image as input, except for inception_v3 which takes 299*299
                                       transforms.RandomRotation(30),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


# The validation set will use the same transform as the test set
test_transforms = transforms.Compose([transforms.Resize((244,244)), # all pretrained models take 244*244 image as input, except for inception_v3 which takes 299*299
                                      transforms.CenterCrop(224), # for inception_v3 this needs to be removed
                                      transforms.ToTensor(),
                                      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

validation_transforms = transforms.Compose([transforms.Resize((244,244)), # all pretrained models take 244*244 image as input, except for inception_v3 which takes 299*299
                                            transforms.CenterCrop(224), # for inception_v3 this needs to be removed
                                            transforms.ToTensor(),
                                            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])


# Load the datasets with ImageFolder
train_data = datasets.ImageFolder(data_dir + '/train', transform=train_transforms)
test_data = datasets.ImageFolder(data_dir + '/test', transform=test_transforms)
valid_data = datasets.ImageFolder(data_dir + '/valid', transform=validation_transforms)

# Using the image datasets and the trainforms, define the dataloaders
# The trainloader will have shuffle=True so that the order of the images do not affect the model
trainloader = torch.utils.data.DataLoader(train_data, batch_size=128, shuffle=True)
testloader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True)
validloader = torch.utils.data.DataLoader(valid_data, batch_size=32, shuffle=True)

## Step 3: Create Model

In [None]:
model = models.alexnet(pretrained=True)
#model = models.vgg16(pretrained=True)
#model = models.vgg19(pretrained=True)
#model = models.googlenet(pretrained=True)
#model = models.inception_v3(pretrained=True)
#model = models.resnet18(pretrained=True)
#model = models.resnet34(pretrained=True)
#model = models.resnet50(pretrained=True)

In [None]:
model

In [None]:
# Change final outputs to the number of classes i.e., 196 from previous 1000 classes (the number of output classes in imagenet)
# Different pretrained models have different architectures and the fully connected layers are named differently
# Need to consider case by case
# For models that have single fully connected layer, we change final output nodes to 196
# For models with more than 1 fully connected layer, we change final output nodes to 196 and second last layer to have 1024 output nodes to keep number of trainable parameters low

# For alexnet
model.classifier[4] = nn.Linear(4096,1024)
model.classifier[6] = nn.Linear(1024,196)

# For vgg16 and vgg19
#model.classifier[3] = nn.Linear(4096,1024)
#model.classifier[6] = nn.Linear(1024,196)

# For googlenet
#model.fc = nn.Linear(1024, 196

# For inceptionv3 
#model.aux_logits=False
#model.fc = nn.Linear(2048, 196)

# For resnet18 and resnet34
#model.fc = nn.Linear(512, 196)

# For resnet50 and 
#model.fc = nn.Linear(2048, 196)

In [None]:
model

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3, threshold = 0.9)

## Step 4: Train Model

In [None]:
# Implement a function for the validation pass
def validation(model, validloader, criterion):
    valid_loss = 0
    accuracy = 0
    
    # change model to work with cuda
    model.to('cuda')

    # Iterate over data from validloader
    for ii, (images, labels) in enumerate(validloader):
    
        # Change images and labels to work with cuda
        images, labels = images.to('cuda'), labels.to('cuda')

        # Forward pass image though model for prediction
        output = model.forward(images)
        # Calculate loss
        valid_loss += criterion(output, labels).item()
        # Calculate probability
        ps = torch.exp(output)
        
        # Calculate accuracy
        equality = (labels.data == ps.max(dim=1)[1])
        accuracy += equality.type(torch.FloatTensor).mean()
    
    return valid_loss, accuracy

In [None]:
epochs = 10
steps = 0
print_every = 40
train_dic = {}
valid_dic = {}

# change to gpu mode
model.to('cuda')
model.train()
for e in range(epochs):

    running_loss = 0
    
    # Iterating over data to carry out training step
    for ii, (inputs, labels) in enumerate(trainloader):
        steps += 1
        
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        
        # zeroing parameter gradients
        optimizer.zero_grad()
        
        # Forward and backward passes
        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        # Carrying out validation step
        if steps % print_every == 0:
            # setting model to evaluation mode during validation
            model.eval()
            
            # Gradients are turned off as no longer in training
            with torch.no_grad():
                valid_loss, accuracy = validation(model, validloader, criterion)
            
            # Turning training back on
            model.train()
            lrscheduler.step(accuracy * 100)
    print(f"No. epochs: {e+1}, \
    Training Loss: {round(running_loss/print_every,3)} \
    Valid Loss: {round(valid_loss/len(validloader),3)} \
    Valid Accuracy: {round(float(accuracy/len(validloader)),3)}")
            
    train_dic[e+1] = round(running_loss/print_every,3)
    valid_dic[e+1] = round(valid_loss/len(validloader),3)
           

In [None]:
# Visualise training plot

# Retrieve each dictionary's values
train_values = train_dic.values()
val_values = valid_dic.values()
 
# Generate a sequence of integers to represent the epoch numbers
epochs = range(1, 11)
 
# Plot and label the training and validation loss values
plt.plot(epochs, train_values, label='Training Loss')
plt.plot(epochs, val_values, label='Validation Loss')
 
# Add in a title and axes labels
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
 
# Set the tick locations
plt.xticks(arange(0, 11, 1))

#plt.plot(figsize=(20, 10))

# Display the plot
plt.legend(loc='best')
plt.show()

In [None]:
# Check accuracy

correct = 0
total = 0
model.to('cuda')
actual_class = []
predicted_class = []


with torch.no_grad():
    for data in testloader:
        images, labels = data
        images, labels = images.to('cuda'), labels.to('cuda')
        # Get probabilities
        outputs = model(images)
        # Turn probabilities into predictions
        _, predicted_outcome = torch.max(outputs.data, 1)
        # Total number of images
        total += labels.size(0)
        # Count number of cases in which predictions are correct
        correct += (predicted_outcome == labels).sum().item()
        
        actual_class.append(labels.tolist())
        predicted_class.append(predicted_outcome.tolist())

print(f"Test accuracy of model: {round(100 * correct / total,3)}%")

In [None]:
# Get test prediction in a list of lists

def flat(lis):
    flatList = []
    # Iterate with outer list
    for element in lis:
        if type(element) is list:
            # Check if type is list than iterate through the sublist
            for item in element:
                flatList.append(item)
        else:
            flatList.append(element)
    return flatList

In [None]:
# Get test prediction in a array list

actual_class_flat = np.array(flat(actual_class))
predicted_class_flat = np.array(flat(predicted_class))

In [None]:
# initialize data of lists.
data_pred = {'Actual': actual_class_flat,
        'Predcited': predicted_class_flat}
  
# Create DataFrame
df = pd.DataFrame(data_pred)
  
# Print the output.
df

In [None]:
# For easier saving and loading define model_var as the name of model

model_var = "alexnet" # alexnet, vgg16, vgg19, googlenet, inception_v3, resnet18, resnet34, resnet50

In [None]:
df.to_csv('/home/akm/Cars/Stanford_class/car_data/car_data/' + model_var +'_actual_predcited.csv')

## Step 5: Save the Model

In [None]:
# Saving: feature weights, new model.fc, index-to-class mapping, optimiser state, and No. of epochs
# Applicable for googlenet, inception_v3, resnet18, resnet34, resnet50
checkpoint = {'state_dict': model.state_dict(),
              'model': model.fc,
              'class_to_idx': train_data.class_to_idx,
              'opt_state': optimizer.state_dict,
              'num_epochs': epochs}

torch.save(checkpoint, '/home/akm/Cars/Stanford_class/car_data/car_data/' + model_var + '_checkpoint.pth')

In [None]:
# Saving: feature weights, new model.fc, index-to-class mapping, optimiser state, and No. of epochs
# Applicable for alexnet, vgg16, vgg19
checkpoint = {'state_dict': model.state_dict(),
              'model': model.classifier,
              'class_to_idx': train_data.class_to_idx,
              'opt_state': optimizer.state_dict,
              'num_epochs': epochs}

torch.save(checkpoint, '/home/akm/Cars/Stanford_class/car_data/car_data/' + model_var + '_checkpoint.pth')

## Step 6: Load the Model

In [None]:
# Imports here
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision.models as models
from PIL import Image
import json
from matplotlib.ticker import FormatStrFormatter
import os
import random
from pickle import load
#from matplotlib.pylab import plt
from numpy import arange

In [None]:
# Write a function that loads a checkpoint and rebuilds the model

def load_checkpoint(filepath):

    checkpoint = torch.load(filepath)
    
    #model.load_state_dict(checkpoint['state_dict'])
    model.load_state_dict(checkpoint['state_dict'], strict=False)
    model.class_to_idx = checkpoint['class_to_idx']
    
    return model

In [None]:
model = models.alexnet(pretrained=True)
#model = models.vgg16(pretrained=True)
#model = models.vgg19(pretrained=True)
#model = models.googlenet(pretrained=True)
#model = models.inception_v3(pretrained=True)
#model = models.resnet18(pretrained=True)
#model = models.resnet34(pretrained=True)
#model = models.resnet50(pretrained=True)

In [None]:
# Change final outputs to the number of classes i.e., 196 from previous 1000 classes (the number of output classes in imagenet)
# Different pretrained models have different architectures and the fully connected layers are named differently
# Need to consider case by case
# For models that have single fully connected layer, we change final output nodes to 196
# For models with more than 1 fully connected layer, we change final output nodes to 196 and second last layer to have 1024 output nodes to keep number of trainable parameters low

# For alexnet
model.classifier[4] = nn.Linear(4096,1024)
model.classifier[6] = nn.Linear(1024,196)

# For vgg16 and vgg19
#model.classifier[3] = nn.Linear(4096,1024)
#model.classifier[6] = nn.Linear(1024,196)

# For googlenet
#model.fc = nn.Linear(1024, 196

# For inceptionv3 
#model.aux_logits=False
#model.fc = nn.Linear(2048, 196)

# For resnet18 and resnet34
#model.fc = nn.Linear(512, 196)

# For resnet50 and 
#model.fc = nn.Linear(2048, 196)

In [None]:
# Loading model

model_var = "alexnet" # alexnet, vgg16, vgg19, googlenet, inception_v3, resnet18, resnet34, resnet50

model = load_checkpoint('/home/akm/Cars/Stanford_class/car_data/car_data/' + model_var + '_checkpoint.pth')
# Checking model i.e. should have 196 output units in the classifier
print(model)


## Step 7: Predict the Image

In [None]:
def process_image(image):
    
    # Process a PIL image for use in a PyTorch model

    # Converting image to PIL image using image file path
    pil_im = Image.open(f'{image}' + '.jpg')

    # Building image transform
    transform = transforms.Compose([transforms.Resize((244,244)), # all pretrained models take 244*244 image as input, except for inception_v3 which takes 299*299
                                    transforms.CenterCrop(224), # for inception_v3 this needs to be removed
                                    transforms.ToTensor(),
                                    transforms.Normalize([0.485, 0.456, 0.406], 
                                                         [0.229, 0.224, 0.225])]) 
    
    # Transforming image for use with network
    pil_tfd = transform(pil_im)
    
    # Converting to Numpy array 
    array_im_tfd = np.array(pil_tfd)
    
    return array_im_tfd

In [None]:
def imshow(image, ax=None, title=None):
    if ax is None:
        fig, ax = plt.subplots()
    
    # PyTorch tensors assume the color channel is the first dimension
    # but matplotlib assumes is the third dimension
    image = image.transpose((1, 2, 0))
    
    # Undo preprocessing
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    image = std * image + mean
    
    # Image needs to be clipped between 0 and 1 or it looks like noise when displayed
    image = np.clip(image, 0, 1)
    
    ax.imshow(image)
    
    return ax

In [None]:
data_dir = '/home/akm/Cars/Stanford_class/car_data/car_data'

In [None]:
imshow(process_image(data_dir + '/test/' + 'Tesla Model S Sedan 2012' + '/07305'))

In [None]:
def predict(image_path, model, topk=5):
    # Implement the code to predict the class from an image file   
    
    # Loading model - using .cpu() for working with CPUs
    loaded_model = load_checkpoint(model).cpu()
    # Pre-processing image
    img = process_image(image_path)
    # Converting to torch tensor from Numpy array
    img_tensor = torch.from_numpy(img).type(torch.FloatTensor)
    # Adding dimension to image to comply with (B x C x W x H) input of model
    img_add_dim = img_tensor.unsqueeze_(0)

    # Setting model to evaluation mode and turning off gradients
    loaded_model.eval()
    with torch.no_grad():
        # Running image through network
        output = loaded_model.forward(img_add_dim)
        
    #conf, predicted = torch.max(output.data, 1)   
    probs_top = output.topk(topk)[0]
    predicted_top = output.topk(topk)[1]
    
    # Converting probabilities and outputs to lists
    conf = np.array(probs_top)[0]
    predicted = np.array(predicted_top)[0]
        
    #return probs_top_list, index_top_list
    return conf, predicted

In [None]:
# tie the class indices to their names

def find_classes(dir):
    classes = os.listdir(dir)
    classes.sort()
    class_to_idx = {classes[i]: i for i in range(len(classes))}
    return classes, class_to_idx
classes, c_to_idx = find_classes(data_dir+"/train")

print(classes, c_to_idx)

In [None]:
model_path = '/home/akm/Cars/Stanford_class/car_data/car_data/' + model_var  +'_checkpoint.pth'
image_path = data_dir + '/test/' + 'Tesla Model S Sedan 2012' + '/07305'


conf1, predicted1 = predict(image_path, model_path, topk=5)

print(conf1)
print(classes[predicted1[4]])

In [None]:
plt.rcParams.update({'font.size': 15})


In [None]:
def plot_solution(cardir, model):
  # Testing predict function

  # Inputs are paths to saved model and test image
  model_path = '/home/akm/Cars/Stanford_class/car_data/car_data/' + model_var + '_checkpoint.pth'
  image_path = cardir
  carname = cardir.split('/')[8]

  conf2, predicted1 = predict(image_path, model_path, topk=5)
  # Converting classes to names
  names = []
  for i in range(5):
  
      names += [classes[predicted1[i]]]


  # Creating PIL image
  image = Image.open(image_path+'.jpg')

  # Plotting test image and predicted probabilites
  f, ax = plt.subplots(2,figsize = (6,10))

  ax[0].imshow(image)
  ax[0].set_title(carname)

  y_names = np.arange(len(names))
  ax[1].barh(y_names, conf2/conf2.sum(), color='darkblue')
  ax[1].set_yticks(y_names)
  ax[1].set_yticklabels(names)
  ax[1].invert_yaxis() 

  plt.show()

In [None]:
cardir= (data_dir + '/test/' + 'Tesla Model S Sedan 2012' + '/07305')
plot_solution(cardir, model)