<a href="https://colab.research.google.com/github/atish3/rob_535_final_project/blob/main/ROB535_FinalProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

'''
In this cell, we define our neural network
and a couple of normalization scripts
'''


device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

class NeuralNetwork(nn.Module):
    def __init__(self):
      super(NeuralNetwork, self).__init__()
      self.vgg_16 = nn.Sequential(
        nn.Conv2d(3,64,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(64,64,3,padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(64,128,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(128,128,3,padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(128,256,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(256,256,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(256,256,3,padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(256,512,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(512,512,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(512,512,3,padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Conv2d(512,512,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(512,512,3,padding='same'),
        nn.ReLU(),
        nn.Conv2d(512,512,3,padding='same'),
        nn.ReLU(),
        nn.MaxPool2d(2),
        nn.Flatten(),
        nn.Linear(25088,4096),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.Linear(4096,4096),
        nn.ReLU(),
        nn.Dropout(0.1),
        nn.Linear(4096,3)
    ).to(device)

    def fit(self, X):
        self.image_mean = torch.mean(X, axis=(0,2,3))
        self.image_std = torch.std(X, axis=(0,2,3))

    def transform(self, X):
        img_transforms = nn.Sequential(
          transforms.Normalize(self.image_mean, self.image_std) 
        )
        X = img_transforms(X)
        return X

    def forward(self, x):
      self.fit(x)
      x = self.transform(x)
      logits = self.vgg_16(x)
      return logits

    def init_weights(self, m):
      if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)
      elif isinstance(m, nn.Conv2d):
        m.weight.data.fill(0.01)
        m.bias.data.fill(0.01)

Using cuda device


In [None]:
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import datasets, transforms
from skimage.transform import resize

'''
In this cell, we define a custom dataset to read in
the resized dataset images
'''

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, train=True):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir
        self.train = train

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_dir = self.img_labels.iloc[idx,0]
        img_directory = img_dir.split("/")
        label = None
        img_name = img_directory[1] + "_image.jpg"
        img_path = os.path.join(self.img_dir, img_directory[0], img_name)
        image = read_image(img_path).float()
        if(self.train):
            label = self.img_labels.iloc[idx, 1]
            
        if(self.train):
            return image, label
        else:
            return image, img_dir

In [None]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import transforms

'''
We read in the training and validation sets
in this cell
'''

training_data = CustomImageDataset("/content/drive/Shareddrives/ROB 535 Perception Project/trainval/labels.csv", "/content/drive/Shareddrives/ROB 535 Perception Project/new_trainval")

train_size = int(0.6*len(training_data))
val_size = len(training_data) - train_size

train_data, val_data = random_split(training_data, [train_size, val_size])

train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True, pin_memory=True)
val_dataloader = DataLoader(val_data, batch_size=1, shuffle=True, pin_memory=True)

In [None]:
import torch
import numpy as np
import random


#This function returns the prediction
#for each image by extracting the index
#with the highest value in each row
def predictions(logits):
  return torch.argmax(logits, dim=1)


#Training loop for the neural network
def train_epoch(data_loader, model, criterion, optimizer):
    batch_num = 0
    for i, (X, y) in enumerate(data_loader):
        # clear parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        X = X.to(device)
        y = y.to(device)

        output = model(X)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()

        print(batch_num)
        batch_num += 1


#Evaluation loop for the validation set
def evaluate_epoch(tr_loader, val_loader, model, criterion, epoch,
    stats):
    y_true, y_pred = [], []
    correct, total = 0, 0
    running_loss = []

    for X, y, in tr_loader:
        with torch.no_grad():
            X = X.to(device)
            y = y.to(device)
          
            output = model(X)
            predicted = predictions(output.data)
            y_true.append(y)
            y_pred.append(predicted)
            total += y.size(0)
            correct += (predicted == y).sum().item()
            running_loss.append(criterion(output, y).item())
    train_loss = np.mean(running_loss)
    train_acc = correct / total
    y_true, y_pred = [], []
    correct, total = 0, 0
    running_loss = []

    for X, y, in val_loader:
        with torch.no_grad():
            X = X.to(device)
            y = y.to(device)
            output = model(X)
            predicted = predictions(output.data)
            y_true.append(y)
            y_pred.append(predicted)
            total += y.size(0)
            correct += (predicted == y).sum().item()
            running_loss.append(criterion(output, y).item())
    val_loss = np.mean(running_loss)
    val_acc = correct / total
    stats.append([val_acc, val_loss, train_acc, train_loss])



#Model initialization
epoch_num = 0
best_val = 0
best_ep = 0

model = NeuralNetwork().to(device)
weights = torch.tensor([1/564, 1/4803, 1/2206])
criterion = torch.nn.CrossEntropyLoss(weight=weights).to(device)
optimizer = torch.optim.Adam(params=model.parameters())

#Creating checkpoint filepath
if(not os.path.isdir("/content/drive/Shareddrives/ROB 535 Perception Project/final_checkpoints")):
  os.mkdir("/content/drive/Shareddrives/ROB 535 Perception Project/final_checkpoints")

#Stores training and validation info across epochs
stats = []


#Training loop
for epoch in range(25):
        # Train model
        print(epoch)
        model.train()
        train_epoch(train_dataloader, model, criterion, optimizer)

        # Evaluate model
        model.eval()
        evaluate_epoch(train_dataloader, val_dataloader, model, criterion, epoch+1,
            stats)
        print(stats[-1])

        #Save checkpoint
        torch.save({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': stats[-1][0],
            }, "/content/drive/Shareddrives/ROB 535 Perception Project/test_checkpoints/checkpoint_new_{0}.pt".format(epoch))
        
        #Update if we find a better trained model at any epoch
        if(stats[-1][0] > best_val):
            best_val = stats[-1][0]
            best_ep = epoch

print(best_ep, best_val)

In [None]:
'''
This cell loads in the test data and the model used for submission
It runs the model against the current training and validation set, capturing
the training and validation accuracy and loss at that epoch, and then
generates a file of labels for the test data.

To regenerate a test data file, uncomment the commented lines below.
The final submission file is saved at 
/content/drive/Shareddrives/ROB 535 Perception Project/test/output.csv
'''


def eval_test(test_loader, model):
  img_num = 1
  out_file = open("/content/drive/Shareddrives/ROB 535 Perception Project/test/output.csv", "w")
  out_file.write("guid/image,label\n")

  for X, img_dir in test_loader:
    if(img_num % 100 == 0):
      print(img_num)
    img_num += 1
    with torch.no_grad():
      output = model(X.cuda())
      predicted = predictions(output.data)
      guid = img_dir[0]
      label = predicted[0]
      out_file.write("{0},{1}\n".format(guid, label))
  out_file.close()


#test_data = CustomImageDataset("/content/drive/Shareddrives/ROB 535 Perception Project/test/test.csv", "/content/drive/Shareddrives/ROB 535 Perception Project/new_test", train=False)
#test_dataloader = DataLoader(test_data, batch_size=1, shuffle=False, pin_memory=True)


model = NeuralNetwork().to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
stats = []

checkpoint = torch.load("/content/drive/Shareddrives/ROB 535 Perception Project/final_checkpoints/checkpoint_new_25.pt")
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
loss = checkpoint['loss']

model.eval()
evaluate_epoch(train_dataloader, val_dataloader, model, criterion, epoch+1, stats)
print(stats[-1])
#eval_test(test_dataloader, model)



In [None]:
import os 
import glob

#This script generates a label file for the test dataset, used by
#the CustomDataset class defined below. No need to run this cell, since
#the label file already exists

test_dir = "/content/drive/Shareddrives/ROB 535 Perception Project/test"
test_csv = open("/content/drive/Shareddrives/ROB 535 Perception Project/test/test.csv", "w")
test_csv.write("guid/image\n")

for root, dirs, _ in os.walk(test_dir):
    for dir in dirs:
        newDir = os.path.join(root, dir)
        img_files = newDir + "/*_image.jpg"
        names = glob.glob(img_files)
        names.sort()
        for name in names:
            name_arr = name.split("/")
            guid = name_arr[6]
            filename = name_arr[7]
            file_arr = filename.split("_")
            filenum = file_arr[0]
            test_csv.write("{0}/{1}\n".format(guid,filenum))
test_csv.close()


In [None]:
import os 
import glob
from skimage.transform import resize
from torchvision.io import read_image
from torchvision.transforms import ToPILImage
import numpy as np
import cv2
from PIL import Image


'''
This script preprocesses the training images by resizing them to 224x224
and saving them to a separate directory. If you are accessing this code through
the Shared Drive, there is no need to repeat this process.
'''

train_dir = "/content/drive/Shareddrives/ROB 535 Perception Project/trainval"
train_224_dir = "/content/drive/Shareddrives/ROB 535 Perception Project/new_trainval"
if(not os.path.isdir(train_224_dir)):
  os.mkdir(train_224_dir)


img_num = 1

for root, dirs, _ in os.walk(train_dir):
    for dir in dirs:
        print(dir)
        old_dir = os.path.join(root, dir)
        new_dir = os.path.join(train_224_dir, dir)
        if(not os.path.isdir(new_dir)): 
          os.mkdir(new_dir)
        img_files = old_dir + "/*_image.jpg"
        names = glob.glob(img_files)
        names.sort()
        for name in names:
          name_arr = name.split("/")
          filename = name_arr[7]
          image = read_image(name).float()
          image = resize(image, (image.shape[0], 224, 224))
          image = image.transpose((1,2,0))
          out_dir = os.path.join(new_dir, filename)
          cv2.imwrite(out_dir, cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
          if(img_num % 1000 == 0):
            print(img_num)
          img_num += 1




In [None]:
import os 
import glob
from skimage.transform import resize
from torchvision.io import read_image
from torchvision.transforms import ToPILImage
import numpy as np
import cv2
from PIL import Image

'''
This script preprocesses the test images by resizing them to 224x224
and saving them to a separate directory. If you are accessing this code through
the Shared Drive, there is no need to repeat this process.
'''


test_dir = "/content/drive/Shareddrives/ROB 535 Perception Project/test"
test_224_dir = "/content/drive/Shareddrives/ROB 535 Perception Project/new_test"
if(not os.path.isdir(test_224_dir)):
  os.mkdir(test_224_dir)


img_num = 1

for root, dirs, _ in os.walk(test_dir):
    for dir in dirs:
        old_dir = os.path.join(root, dir)
        new_dir = os.path.join(test_224_dir, dir)
        if(not os.path.isdir(new_dir)): 
          os.mkdir(new_dir)
        img_files = old_dir + "/*_image.jpg"
        names = glob.glob(img_files)
        names.sort()
        for name in names:
          name_arr = name.split("/")
          filename = name_arr[7]
          image = read_image(name).float()
          image = resize(image, (image.shape[0], 224, 224))
          image = image.transpose((1,2,0))
          out_dir = os.path.join(new_dir, filename)
          cv2.imwrite(out_dir, cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
          if(img_num % 1000 == 0):
            print(img_num)
          img_num += 1