In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Data: https://www.kaggle.com/datasets/puneet6060/intel-image-classification/data

#Problem Description

For this problem I am going to running classification on an intel image dataset sourced from kaggle. The images fall into six categories; buildings, forests, glaciers, mountains, seas, and streets. The full dataset contains roughly 25,000 color images of size 150x150. Due to memory limitations we are going to be down sampling from the full set. The Training set will include 80 images from each of the six, while the testing set will only include 10 from each category. This would be higher, but for the ResNet-50 model this seems to be just about as high as I can go without exceeding the 12.7 GB memory cap.

#Data Analysis and Processing


In [None]:
import os

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

import glob as gb
import cv2

In [None]:
train_path = 'drive/MyDrive/Datasets/intel_image_dataset/seg_train/'
test_path = 'drive/MyDrive/Datasets/intel_image_dataset/seg_test/'
pred_path = 'drive/MyDrive/Datasets/intel_image_dataset/seg_pred/'

In [None]:
label = {'buildings': 0, 'forest': 1, 'glacier': 2, 'mountain': 3, 'sea': 4, 'street': 5}

In [None]:
# linked below is a notebook. Some of the code relating to extracting the dataset and
# converting it into a csv file was modified and the used for the save of this project

# https://www.kaggle.com/code/offwitt0/intel-image-classification

training_instances = {}
training_total = 0

testing_instances = {}
testing_total = 0

#find how many instances of each label are there in each dataset
for folder in  os.listdir(train_path + 'seg_train') :
  files = gb.glob(pathname= str( train_path +'seg_train//' + folder + '/*.jpg'))
  found = len(files)

  training_instances[folder] = found
  training_total += found

for folder in  os.listdir(test_path +'seg_test') :
  files = gb.glob(pathname= str( test_path +'seg_test//' + folder + '/*.jpg'))
  found = len(files)

  testing_instances[folder] = found
  testing_total += found

for name in label.keys():
  label_percentage_train = 100 * training_instances[name]/training_total
  label_percentage_test = 100 * testing_instances[name]/testing_total

  print('{}:\n\ttraining: {:.2f}%\n\ttest: {:.2f}%'.format(name, label_percentage_train, label_percentage_test))



buildings:
	training: 15.61%
	test: 14.56%
forest:
	training: 16.18%
	test: 15.79%
glacier:
	training: 17.13%
	test: 18.42%
mountain:
	training: 17.90%
	test: 17.55%
sea:
	training: 16.20%
	test: 16.99%
street:
	training: 16.97%
	test: 16.69%


In [None]:
training_dims = {(150, 150, 3): 0, 'other': 0}
testing_dims = {(150, 150, 3): 0, 'other': 0}

for folder in  os.listdir(train_path +'seg_train') :
  files = gb.glob(pathname= str( train_path +'seg_train//' + folder + '/*.jpg'))
  for file in files:
    image = plt.imread(file)

    if(image.shape == (150, 150, 3)):
      training_dims[image.shape] += 1
    else:
      training_dims['other'] += 1

for folder in  os.listdir(test_path +'seg_test') :
  files = gb.glob(pathname= str( test_path +'seg_test//' + folder + '/*.jpg'))
  for file in files:
    image = plt.imread(file)

    if(image.shape == (150, 150, 3)):
      testing_dims[image.shape] += 1
    else:
      testing_dims['other'] += 1

print(f'training dimensions: {training_dims}')
print(f'testing dimensions: {testing_dims}')

training dimensions: {(150, 150, 3): 13986, 'other': 48}
testing dimensions: {(150, 150, 3): 2995, 'other': 7}


In [None]:
import torch.nn.functional as func

def load(path, folder_name, sample_size):
  X = []
  y = []

  # fill lists
  for folder in  os.listdir(path + folder_name) :
    files = gb.glob(pathname= str( path + folder_name + '//' + folder + '/*.jpg'))

    ctr = 0
    for file in files:
      image = cv2.imread(file) #converts the image into a numpy array

      # dont include images whose dimensions do not match (150, 150, 3)
      if(image.shape != (150, 150, 3)):
        continue
      else:
        ctr += 1

      X.append(list(image))
      y.append(label[folder])

      if(ctr == sample_size):
        break

      print("\rCATEGORY: {:.9s} ............... {:.2f}%".format(folder, 100 * ctr/sample_size), end = '')

  return torch.tensor(X).float(), func.one_hot(torch.tensor(y), 6).float()

In [None]:
import torch

# convert the X_train and y_train to numpy arrays
X_train, y_train = load(train_path, 'seg_train', 80)
X_test, y_test = load(test_path, 'seg_test', 10)

print(f'\rX_train shape: {X_train.shape}, y_train shape: {y_train.shape}')
print(f'X_test shape: {X_test.shape}, y_test shape: {y_test.shape}')

X_train shape: torch.Size([480, 150, 150, 3]), y_train shape: torch.Size([480, 6])
X_test shape: torch.Size([60, 150, 150, 3]), y_test shape: torch.Size([60, 6])


# Part 1

In [None]:
import torch.nn as nn
import torch.nn.functional as func

class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()
    # convolutional layers
    self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
    self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)

    # pooling layers
    self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

    # fully connected layers
    self.fc1 = nn.Linear(128 * 18 * 18, 512)  # 18x18 is the spatial size after three max-pooling layers
    self.fc2 = nn.Linear(512, 6)  # because there are siz categories

  def forward(self, X):
    # first convolutional layer
    c1 = func.relu(self.conv1(X));
    p1 = self.pool(c1)

    # second convolutional layer
    c2 = func.relu(self.conv2(p1));
    p2 = self.pool(c2)

    # third convolutional layer
    c3 = func.relu(self.conv3(p2));
    p3 = self.pool(c3)

    # flattened output of the coonvolutional layers
    co = torch.flatten(p3, 1) # flatten the dimensions

    # fully connected layers
    o1 = func.relu(self.fc1(co))
    o2 = func.softmax(self.fc2(o1))

    return o2

In [None]:
import time
from torch.autograd import Variable
from torch.utils.data import DataLoader, TensorDataset

def train(model, X_train, y_train, num_epochs, batch_size, learning_rate):
  start_time = time.time()

  #initalize the loss function and the optimizer
  criterion = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

  #transform the training data into tensor
  training_dataset = TensorDataset(X_train, y_train)

  for epoch_idx in range(num_epochs):
    print(f"EPOCH: {epoch_idx + 1}")

    epoch = DataLoader(training_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    total_loss = 0

    for batch_idx, (X_minibatch, y_minibatch) in enumerate(epoch):

      #zero the gradient and
      optimizer.zero_grad()  # Clear gradients from the previous iteration
      y_preds = model(X_minibatch)

      #compute the loss
      loss = criterion(y_preds, y_minibatch)
      total_loss += loss

      #preform backwards propogation and update the weights
      loss.backward()
      optimizer.step()

      #print the batch loss
      print("\tminibatch {:<3d} LOSS: {:.6f}   [{:5d}/{:5d}]".format(batch_idx, loss, (batch_idx + 1) * batch_size, X_train.size(0)))

    #print the average loss for the epoch
    print("\n\tAVG. LOSS: {:.6f}".format(total_loss/(X_train.size(0) // batch_size)))


  end_time = time.time()
  print("\nTIME: {:.2f} sec".format(end_time-start_time))

In [None]:
# initialize the convolutional networks
cnn1 = CNN()

In [None]:
# train the neural network)
train(cnn1, X_train.permute(0, 3, 1, 2), y_train, 3, 48, 0.001)

EPOCH: 1


  o2 = func.softmax(self.fc2(o1))


	minibatch 0   LOSS: 1.896871   [   48/  480]
	minibatch 1   LOSS: 1.918592   [   96/  480]
	minibatch 2   LOSS: 1.918592   [  144/  480]
	minibatch 3   LOSS: 1.897758   [  192/  480]
	minibatch 4   LOSS: 1.918592   [  240/  480]
	minibatch 5   LOSS: 1.981092   [  288/  480]
	minibatch 6   LOSS: 1.751925   [  336/  480]
	minibatch 7   LOSS: 1.856092   [  384/  480]
	minibatch 8   LOSS: 1.731092   [  432/  480]
	minibatch 9   LOSS: 1.939425   [  480/  480]

	AVG. LOSS: 1.881003
EPOCH: 2
	minibatch 0   LOSS: 1.897758   [   48/  480]
	minibatch 1   LOSS: 1.856092   [   96/  480]
	minibatch 2   LOSS: 1.876925   [  144/  480]
	minibatch 3   LOSS: 1.876925   [  192/  480]
	minibatch 4   LOSS: 1.793592   [  240/  480]
	minibatch 5   LOSS: 1.835258   [  288/  480]
	minibatch 6   LOSS: 1.918592   [  336/  480]
	minibatch 7   LOSS: 1.939425   [  384/  480]
	minibatch 8   LOSS: 1.856092   [  432/  480]
	minibatch 9   LOSS: 1.918592   [  480/  480]

	AVG. LOSS: 1.876925
EPOCH: 3
	minibatch 0   LOS

## Metrics

For this homework I am just going to be using a normal accuracy measurement for my main metric. There are a couple of reasons for this. First of all the normal accuracy measurement is a good overview of how model is doing in general. Beyond that, the normal accuracy measurment is not very computationally intesive. This is specifically important because I am running resnet50, memory is at a premium and due to how long it already takes to train that model, I would prefer to keep minimize time to compute when possible. I would also like to use an roc_auc measurement to see what my models confidence is like but unfortunately past experience has told that this would almost certainly crash my notebook. Another metric that I might implement if I had some more time and memory would be a confusion matrix just to specifically point to which labels my model tends to get right.

Additionally I will be measuring and comparing the time in seconds that it takes each model to complete it training. This is something I do for pretty much model as a way of guaging effeciency.

In [None]:
from sklearn.metrics import roc_auc_score

def accuracy(model, X_test, y_test):
  #generate predictions and convert them to labels
  y_preds = model(X_test) #probabilites
  y_preds = torch.argmax(y_preds, dim=1)

  #compute the accuracy
  return 100 * torch.eq(torch.argmax(y_test, dim=1), y_preds).sum()/y_test.size(0)

def roc(model, X_test, y_test):
  #generate predictions and convert them to labels
  y_preds = model(X_test) #probabilites
  _, y_preds = torch.max(y_preds, 1) #find the highest probabilities
  y_preds = y_preds.detach().numpy()

  print(y_preds)

  #compute the accuracy
  return roc_auc_score(y_test, y_preds)

In [None]:
print("Convolutional Neural Network 1\n")

#find the accuracy for each model for the train set
accuracy_cnn_train = accuracy(cnn1, X_train.permute(0, 3, 1, 2), y_train)

#display the results
print("TRAINING ACCURACY: {:.2f}%".format(accuracy_cnn_train))

#find the accuracy for each model for the test set
accuracy_cnn_test= accuracy(cnn1, X_test.permute(0, 3, 1, 2), y_test)

#display the results
print("TESTING ACCURACY: : {:.2f}%".format(accuracy_cnn_test))

Convolutional Neural Network 1



  o2 = func.softmax(self.fc2(o1))


TRAINING ACCURACY: 16.67%
TESTING ACCURACY: : 16.67%


# Model 1 Evaluation

In both the training and testing set the model managed to acheive a 16.67% accuracy rating. This is bad but not entirely to be unexpected. Given that we are processing colored images of size 150x150, in order to capture enough detail to be really accurate the model would probably have be much more complex than the one implemented above. Additionally one barrier which might have prevented the model from acheiving a higher accuracy would be a vanishing gradient which might explain why loss between minibatches does not seem to change too much. Something else which I think is happening in this scenario is that the model is getting stuck in a local minima when preforming gradient descent. I think this might explain why the loss suddenly increases in some instances, even after decreasing steadily for multiple minibatches in a row. It also makes some sense when looking at the accuracy. 16.67% is pretty much exactly the same odds as one would get if they were to guess randomly. It could be the machine found guessing randomly to be the most immediate way to icrease accuracy. Finally I would just like to note that I did try altering many of the hyperparameters in order to increase the accuracy but the effects were ultimately negligable. All that being said the model was quite quick to train which I suppose was the one upside. I do suspect this issue may not be the case when running resnet50.




# Part 2

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision import datasets, transforms

# intialize the model
resnet50 = models.resnet50()

transform = transforms.Compose([
    transforms.ToPILImage(), # convert tensor to PIL(python image library) Image
    transforms.Resize((224, 224)), # resize to 224x224 which is what the model expects
    transforms.ToTensor(), # convert back to tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # normalize tensor
])

# Apply transformations to each sample in the tensor dataset
transformed_X_train = torch.stack([transform(img) for img in X_train.permute(0, 3, 1, 2)])
transformed_y_train = torch.argmax(y_train, dim=1)

print(transformed_X_train.shape)

#train the model
train(resnet50, transformed_X_train, transformed_y_train, 3, 48, 0.001)

torch.Size([480, 3, 224, 224])
EPOCH: 1
	minibatch 0   LOSS: 7.715726   [   48/  480]
	minibatch 1   LOSS: 4.320488   [   96/  480]
	minibatch 2   LOSS: 2.073392   [  144/  480]
	minibatch 3   LOSS: 3.279970   [  192/  480]
	minibatch 4   LOSS: 2.207946   [  240/  480]
	minibatch 5   LOSS: 2.224346   [  288/  480]
	minibatch 6   LOSS: 1.702747   [  336/  480]
	minibatch 7   LOSS: 1.748207   [  384/  480]
	minibatch 8   LOSS: 2.127285   [  432/  480]
	minibatch 9   LOSS: 1.621047   [  480/  480]

	AVG. LOSS: 2.902116
EPOCH: 2
	minibatch 0   LOSS: 1.589665   [   48/  480]
	minibatch 1   LOSS: 1.413880   [   96/  480]
	minibatch 2   LOSS: 1.166868   [  144/  480]
	minibatch 3   LOSS: 1.092967   [  192/  480]
	minibatch 4   LOSS: 1.512559   [  240/  480]
	minibatch 5   LOSS: 1.493371   [  288/  480]
	minibatch 6   LOSS: 1.819302   [  336/  480]
	minibatch 7   LOSS: 1.302781   [  384/  480]
	minibatch 8   LOSS: 1.963434   [  432/  480]
	minibatch 9   LOSS: 1.197525   [  480/  480]

	AVG. LO

In [None]:
# apply the transformation to the testing set
transformed_X_test = torch.stack([transform(img) for img in X_test.permute(0, 3, 1, 2)])
transformed_y_test = torch.argmax(y_test, dim=1)

In [None]:
# The training accuracy will not be calculated to avoid crashing due to memory limitations

# #find the accuracy for each model for the train set
# accuracy_resnet50_train = accuracy(resnet50, transformed_X_train, transformed_y_train)

# #display the results
# print("TRAINING ACCURACY: {:.2f}%".format(accuracy_resnet50_train))

#generate predictions and convert them to labels
resnet50_y_preds = resnet50(transformed_X_test) #probabilites

In [None]:
_, resnet50_y_preds = torch.max(resnet50_y_preds, 1)

#compute the accuracy
accuracy_resnet50_test = 100 * torch.eq(transformed_y_test, resnet50_y_preds).sum()/transformed_y_test.size(0)

#display the results
print("TRAINING ACCURACY: : {:.2f}%".format(accuracy_resnet50_test))

TRAINING ACCURACY: : 53.33%


# ResNet50 Evaluation

(NOTE: the training data accuracy has been ommitted during this step to avoid crashing, I tried. As a result this evaluation will only be comparing the testing data results.)

The resnet50 model, preformed significantly better than the prior basic CNN. I think one of the major reasons this is the case is because the structure of the residual neural network incorporates skip connections which can remedy the problem of the vanishing gradient. Additionally, ResNet-50 is simply a much more complex model, containing 48 convolutinal layer. This model was also incredibly intensive in terms of memory. During the course of training, I just barely managed to stay under the system cap of 12.7 GB. At its peak the model was using around 12 GB of memory total. Additionally the model took a considerable amount of time to train, roughly 20 min in total.


# Part 3

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision import datasets, transforms

# define a randomized augmentation on the image
augment = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip()
])

# apply the augmentations to the dataset
augmented_X_train = torch.stack([augment(img) for img in X_train.permute(0, 3, 1, 2)])

In [None]:
# initialize another convolution neural network
cnn2 = CNN()

# train the neural network on the augmented data
train(cnn2, augmented_X_train, y_train, 3, 48, 0.001)

EPOCH: 1


  o2 = func.softmax(self.fc2(o1))


	minibatch 0   LOSS: 1.825236   [   48/  480]
	minibatch 1   LOSS: 1.856092   [   96/  480]
	minibatch 2   LOSS: 1.960258   [  144/  480]
	minibatch 3   LOSS: 1.918592   [  192/  480]
	minibatch 4   LOSS: 1.814425   [  240/  480]
	minibatch 5   LOSS: 1.876925   [  288/  480]
	minibatch 6   LOSS: 1.876925   [  336/  480]
	minibatch 7   LOSS: 1.856092   [  384/  480]
	minibatch 8   LOSS: 1.856092   [  432/  480]
	minibatch 9   LOSS: 1.856092   [  480/  480]

	AVG. LOSS: 1.869673
EPOCH: 2
	minibatch 0   LOSS: 1.876925   [   48/  480]
	minibatch 1   LOSS: 1.918592   [   96/  480]
	minibatch 2   LOSS: 1.835258   [  144/  480]
	minibatch 3   LOSS: 1.876925   [  192/  480]
	minibatch 4   LOSS: 1.814425   [  240/  480]
	minibatch 5   LOSS: 1.876925   [  288/  480]
	minibatch 6   LOSS: 1.835258   [  336/  480]
	minibatch 7   LOSS: 1.876925   [  384/  480]
	minibatch 8   LOSS: 1.918592   [  432/  480]
	minibatch 9   LOSS: 1.939425   [  480/  480]

	AVG. LOSS: 1.876925
EPOCH: 3
	minibatch 0   LOS

In [None]:
print("Convolutional Neural Network 2\n")

#find the accuracy for each model for the test set
accuracy_cnn_test_2= accuracy(cnn2, X_test.permute(0, 3, 1, 2), y_test)

#display the results
print("TESTING ACCURACY: : {:.2f}%".format(accuracy_cnn_test_2))

Convolutional Neural Network 2

TESTING ACCURACY: : 16.67%


  o2 = func.softmax(self.fc2(o1))


In [None]:
#find the accuracy for each model for the train set
accuracy_cnn_train_2 = accuracy(cnn2, X_train.permute(0, 3, 1, 2), y_train)

#display the results
print("TRAINING ACCURACY: {:.2f}%".format(accuracy_cnn_train_2))

  o2 = func.softmax(self.fc2(o1))


TRAINING ACCURACY: 16.67%


# Evaluation

The testing and training accuracy more or less have not changed from the first implmentation. This is probably for the same reason as was stated in that first evaluation. Were the model to be say, ResNet-50, the augmenting of this data would probably have helped to avoid overfitting the data however. Depending on the test set, this could have decreased the accuracy of the model or increased the accuracy, it is not really possible to say. Once again this model ran very quick, much quicker than the ResNet-50 model.
