In [1]:
#mount googledrive
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
import torch
import numpy as np

import os
import torchvision
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torchvision.models

In [3]:
# location on Google Drive
feature_path_TRAIN = '/content/gdrive/My Drive/University/5th_Year/APS360/Project/Features/Train/'
feature_path_TEST = '/content/gdrive/My Drive/University/5th_Year/APS360/Project/Features/Test/'
feature_path_VALID = '/content/gdrive/My Drive/University/5th_Year/APS360/Project/Features/Valid/'


In [4]:
# Prepare Dataloader
batch_size = 32
num_workers = 1 

def get_dataset(feature_path):
  return torchvision.datasets.DatasetFolder(feature_path, loader=torch.load, extensions=('.tensor'))

def get_feature_loader(dataset):
  return torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True)

def print_shape(dataiter):
  features, labels = dataiter.next()
  print(features.shape)
  print(labels.shape)

dataset_training          = get_dataset (feature_path_TRAIN)
dataset_validation        = get_dataset (feature_path_VALID)
dataset_testing           = get_dataset (feature_path_TEST)

feature_loader_training   = get_feature_loader(dataset_training)
feature_loader_validation = get_feature_loader(dataset_validation)
feature_loader_testing    = get_feature_loader(dataset_testing)

# Verification Step - obtain one batch of features
dataiter_training         = iter(feature_loader_training)
dataiter_validation       = iter(feature_loader_validation)
dataiter_testing          = iter(feature_loader_testing)

print_shape(dataiter_training)
print_shape(dataiter_validation)
print_shape(dataiter_testing)

torch.Size([32, 256, 6, 6])
torch.Size([32])
torch.Size([32, 256, 6, 6])
torch.Size([32])
torch.Size([32, 256, 6, 6])
torch.Size([32])


In [8]:
#baseline model 
#training data & labels
features_train, labels_train = dataiter_training.next()
training_data = features_train.view(-1, 256 * 6 * 6).detach().numpy()
training_labels = labels_train.detach().numpy()
print("training data: :", training_data.shape, "  training labels: ", training_labels.shape)

#testing data & labels
features_test, labels_test = dataiter_testing.next()
testing_data = features_test.view(-1, 256 * 6 * 6).detach().numpy()
testing_labels = labels_test.detach().numpy()
print("testing data: :", testing_data.shape, "  testing labels: ", testing_labels.shape)

# Random Forest
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=250)

# Fit the model to our training data
model.fit(training_data, training_labels)

# Make predictions
testing_predicted = model.predict(testing_data)
score = 100*(1-sum(abs(testing_predicted-testing_labels))/len(testing_predicted))
print("RF Test:", score)

print ("training labels: ", training_labels)
print ("testing predicted: ", testing_predicted)
print ("testing labels: ",testing_labels)

training data: : (32, 9216)   training labels:  (32,)
testing data: : (32, 9216)   testing labels:  (32,)
RF Test: 100.0
training labels:  [0 0 1 0 0 0 1 1 0 1 0 0 1 1 1 1 0 0 0 0 1 0 1 0 1 1 1 1 0 1 0 0]
testing predicted:  [1 0 1 1 0 1 1 0 1 1 1 1 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 1 1 0 0]
testing labels:  [1 0 1 1 0 1 1 0 1 1 1 1 0 0 0 0 0 1 0 0 1 1 0 0 0 1 1 0 1 1 0 0]
