#Data Loading and Spliting

In [1]:
import torch
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torch.optim as optim
import torchvision
from torchvision import datasets, models, transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import random_split
import time
import csv
from sklearn.dummy import DummyClassifier
import math

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
#Import Datasets
!unzip -q '/content/drive/My Drive/APS360 Project/Breed_Group_Dataset.zip'
!unzip -q '/content/drive/My Drive/APS360 Project/Test Set.zip'

In [2]:
%%time
# Location of Datasets on Google Drive
train_path = '/content/Breed_Group_Dataset/'
test_path = '/content/Test Set/'

# Transform Settings
train_transform = transforms.Compose([transforms.CenterCrop((360,360)),
                                transforms.Resize((224,224)),
                                transforms.RandomHorizontalFlip(p=0.5),
                                transforms.RandomRotation(15),
                                transforms.RandomAdjustSharpness(0.5, p=0.1),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                ])
test_transform = transforms.Compose([transforms.CenterCrop((360,360)),
                                transforms.Resize((224,224)),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                                ])
 
# Create full dataset
full_dataset = torchvision.datasets.ImageFolder(train_path, transform=train_transform)
train_dataset = torchvision.datasets.ImageFolder(train_path, transform=train_transform)
val_dataset = torchvision.datasets.ImageFolder(train_path, transform=test_transform)
test_dataset = torchvision.datasets.ImageFolder(test_path, transform=test_transform)


# Generate a function to obtain a subset of images
def get_subset(indices, start, end):
    return indices[start : start + end]

# Define the split ratio for training and validation
train_percentage, val_percentage = 0.8, 0.2
train_count = int(len(full_dataset) * train_percentage)
val_count = int(len(full_dataset) * val_percentage)
test_count = int(len(test_dataset))

# Obtain indices for each set's dataloader (won't be used for test)
indices = torch.randperm(len(full_dataset))
train_indices = get_subset(indices, 0, train_count)
val_indices = get_subset(indices, train_count, val_count)

CPU times: user 266 ms, sys: 35.8 ms, total: 302 ms
Wall time: 397 ms


In [11]:
print(len(val_indices))

4021


In [3]:
#Get data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, num_workers=0, sampler=SubsetRandomSampler(train_indices))
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=0, sampler=SubsetRandomSampler(val_indices))

In [4]:
%%time
#Prepare tensor for random forest
use_cuda = True
i_max = len(train_indices) #number of training images
training_tensor = np.zeros((i_max,224*224))
labels_tensor = np.zeros((i_max,1))
i = 0

for images,labels in iter(train_loader):
  
  images=images.squeeze(0)[0]
  images=images.reshape(-1, 224*224)

  training_tensor[i]=images
  labels_tensor[i]=labels
  i=i+1
  #print((i/i_max)*100)
  if i==i_max:
    break

CPU times: user 2min 2s, sys: 12.7 s, total: 2min 15s
Wall time: 2min 30s


In [5]:
print(training_tensor.shape)
print(labels_tensor.shape)

(16084, 50176)
(16084, 1)


#Random Forest Classifier

In [6]:
#Train model
from sklearn.ensemble import RandomForestClassifier
dog_classifier = RandomForestClassifier(n_estimators=100)

dog_classifier.fit(training_tensor, labels_tensor)

  """


RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [10]:
element = 0
max_element = len(val_indices)
total = 0
corr = 0

conf_matrix = np.array([["pred |label","Herding","Hound","Non-Sporting","Sporting","Terrier","Toy","Working"],
                    ["Herding", 0, 0, 0, 0, 0, 0, 0],
                    ["Hound", 0, 0, 0, 0, 0, 0, 0],
                    ["Non-Sporting", 0, 0, 0, 0, 0, 0, 0],
                    ["Sporting", 0, 0, 0, 0, 0, 0, 0],
                    ["Terrier", 0, 0, 0, 0, 0, 0, 0],
                    ["Toy", 0, 0, 0, 0, 0, 0, 0],
                    ["Working", 0, 0, 0, 0, 0, 0, 0]
                    ])

for images,labels in iter(val_loader):
  images = images.squeeze(0)[0]
  images = images.reshape(-1, 224*224)
  element = element+1
  total = total+1

  est = dog_classifier.predict(images)
  est = int(est)
  labels = int(labels)
  if est == labels:
    corr = corr+1
  
  conf_matrix[est+1,labels+1]=int(conf_matrix[est+1,labels+1])+1

  if element == max_element:
    break

val_accuracy = corr/total
print('Total elements: ',total)
print('Correct preds: ', corr)
print('Val acc: ',val_accuracy,'\n')
print('Confussion Matrix:\n')
print(conf_matrix)

Total elements:  4021
Correct preds:  930
Val acc:  0.23128574981347924 

Confussion Matrix:

[['pred |label' 'Herding' 'Hound' 'Non-Sporting' 'Sporting' 'Terrier'
  'Toy' 'Working']
 ['Herding' '28' '28' '18' '27' '21' '28' '38']
 ['Hound' '107' '251' '98' '141' '159' '152' '140']
 ['Non-Sporting' '7' '5' '7' '8' '6' '8' '5']
 ['Sporting' '33' '65' '30' '65' '55' '44' '54']
 ['Terrier' '59' '136' '66' '124' '220' '123' '98']
 ['Toy' '53' '102' '41' '79' '99' '135' '85']
 ['Working' '127' '133' '99' '139' '123' '128' '224']]
