## Downloading Data

In [None]:
!wget https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0321EN/data/images/Positive_tensors.zip

In [None]:
#unzip thz zip folder
!unzip -q Positive_tensors.zip

In [None]:
! wget https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0321EN/data/images/Negative_tensors.zip
!unzip -q Negative_tensors.zip.1

In [None]:
!pip install torchvision

## Imports and Auxiliary Functions

In [None]:
import torchvision.models as models
from PIL import Image
import pandas
from torchvision import transforms
import torch.nn as nn
import time
import torch
import matplotlib.pylab as plt
import numpy as np
from torch.utils.data import Dataset, DataLoader
import h5py
import os
import glob
torch.manual_seed(0)

In [None]:
from matplotlib.pyplot import imshow
import matplotlib.pylab as plt
from PIL import Image
import pandas as pd
import os

## Dataset Class

In [None]:
class Dataset(Dataset):

    # Constructor
    def __init__(self,transform=None,train=True):
        directory="/resources/DL0321EN/labs/Week4"
        positive="Positive_tensors"
        negative='Negative_tensors'

        positive_file_path=os.path.join(directory,positive)
        negative_file_path=os.path.join(directory,negative)
        positive_files=[os.path.join(positive_file_path,file) for file in os.listdir(positive_file_path) if file.endswith(".pt")]
        negative_files=[os.path.join(negative_file_path,file) for file in os.listdir(negative_file_path) if file.endswith(".pt")]
        number_of_samples=len(positive_files)+len(negative_files)
        self.all_files=[None]*number_of_samples
        self.all_files[::2]=positive_files
        self.all_files[1::2]=negative_files
        # The transform is goint to be used on image
        self.transform = transform
        #torch.LongTensor
        self.Y=torch.zeros([number_of_samples]).type(torch.LongTensor)
        self.Y[::2]=1
        self.Y[1::2]=0

        if train:
            self.all_files=self.all_files[0:30000]
            self.Y=self.Y[0:30000]
            self.len=len(self.all_files)
        else:
            self.all_files=self.all_files[30000:]
            self.Y=self.Y[30000:]
            self.len=len(self.all_files)

    # Get the length
    def __len__(self):
        return self.len

    # Getter
    def __getitem__(self, idx):

        image=torch.load(self.all_files[idx])
        y=self.Y[idx]

        # If there is any transform method, apply it onto the image
        if self.transform:
            image = self.transform(image)

        return image, y

print("done")

In [None]:
# get current directory
print("Current working directory:", os.getcwd())

In [None]:
train_dataset = Dataset(train=True)
validation_dataset = Dataset(train=False)
print("done")

## Load the pre-trained model resnet18

In [None]:
model=models.resnet18(pretrained=True)

In [None]:
#Set the parameter cannot be trained for the pre-trained model
for param in model.parameters():
    param.requires_grad=False

In [None]:
#512 neurons
model.fc = nn.Linear(in_features=512, out_features=2)

In [None]:
print(model)

## train the model

In [None]:
criterion=nn.CrossEntropyLoss()

In [None]:
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=100)
validation_loader=torch.utils.data.DataLoader(dataset=validation_dataset,batch_size=100)

In [None]:
optimizer = torch.optim.Adam([parameters  for parameters in model.parameters() if parameters.requires_grad],lr=0.001)

### get the accuracy

In [None]:
n_epochs=1
loss_list=[]
accuracy_list=[]
correct=0
N_test=len(validation_dataset)
N_train=len(train_dataset)
start_time = time.time()
#n_epochs

Loss=0
start_time = time.time()
for epoch in range(n_epochs):

    for x, y in train_loader:

        model.train()
        #clear gradient
         optimizer.zero_grad()
        #make a prediction
        z=model(x)

        # calculate loss
        loss=criterion(z,y)
        # calculate gradients of parameters
        loss.backward()
        # update parameters
        optimizer.step()
        loss_list.append(loss.data)
    correct=0
    for x_test, y_test in validation_loader:
        # set model to eval
        model.eval()
        #make a prediction
        z=model(x_test)
        #find max
        _,yhat=torch.max(z.data,1)


        #Calculate misclassified  samples in mini-batch
        #hint +=(yhat==y_test).sum().item()
        correct+=(yhat==y_test).sum.item()

    accuracy=correct/N_test
    accuracy_list.append(accuracy)


In [None]:
accuracy

In [None]:
# plot it
plt.plot(loss_list)
plt.xlabel("iteration")
plt.ylabel("loss")
plt.show()

## Find the misclassified samples

In [None]:
misclassified_samples = []
misclassified_labels = []
misclassified_preds = []

# Loop through validation data
for x_test, y_test in validation_loader:
    model.eval()


    z = model(x_test)
    _, yhat = torch.max(z.data, 1)


    misclassified_mask = (yhat != y_test)
    misclassified_indices = misclassified_mask.nonzero(as_tuple=True)[0]


    for idx in misclassified_indices[:4]:
        misclassified_samples.append(x_test[idx])
        misclassified_labels.append(y_test[idx].item())
        misclassified_preds.append(yhat[idx].item())


    if len(misclassified_samples) >= 4:
        break


for i in range(4):
    print(f"Sample {i+1}:")
    print(f"True Label: {misclassified_labels[i]}")
    print(f"Predicted Label: {misclassified_preds[i]}")