In [35]:
import os
import pandas as pd
import numpy as np
from skimage import io, transform

from sklearn.ensemble import RandomForestClassifier,ExtraTreesClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

import torch
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn as nn

from PIL import Image

# Step 0: Specifying Tasks performed
Delete any tasks you were *unable* to perform:
1. Successfully ran notebook as is; accuracy = 0.664
2. Successfully ran notebook with 1300 training samples; accuracy = 0.764
3. Successfully ran notebook with a new set of pretrained features; accuracy = 0.812
4. Successfully ran notebook with a different classifier; accuracy = 0.8


# Step 1: Loading and Preprocessing the Data

In [36]:
labels = {"01beach":0, "02forest":1, "03mountain":2,
          "04city":3, "05suburb":4, "06street":5,
          "07bedroom":6, "08kitchen":7, "09livingroom":8,
          "10store":9}

In [37]:
# Need to add this since openCV open function was removed
def pil_loader(path):
    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
    with open(path, 'rb') as f:
        img = Image.open(f)
        return img.convert('RGB')


### We will load the training and testing csv files here

In [38]:
train_a = pd.read_csv(os.path.relpath('./train250.csv'))
train_a["label"] = train_a["label"].map(labels)
Xtrain, Ytrain = [], []
for index, row in train_a.iterrows():
  img = pil_loader(os.path.realpath('./'+row['filepath']))
  Xtrain.append(np.array(img))
  Ytrain.append(row['label'])

In [39]:
test_a = pd.read_csv(os.path.relpath('./test250.csv'))
test_a["label"] = test_a["label"].map(labels)
Xtest, Ytest= [], []
for index, row in test_a.iterrows():
  img = pil_loader(os.path.realpath('./'+row['filepath']))
  Xtest.append(np.array(img))
  Ytest.append(row['label'])

In [40]:
train_len = len(train_a)
test_len = len(test_a)
print(f'length of train is {train_len} and length of test is {test_len}')

length of train is 250 and length of test is 250


### Reshape the input to 224x224x3 since this is the required shape for VGG-16

We will also normalize the train and test images with mean --> (0.485, 0.456, 0.406) and standard deviation --> (0.229, 0.224, 0.225)

In [41]:
for i in range(len(Xtrain)):
  Xtrain[i] = transform.resize(Xtrain[i], (224, 224))
  Xtrain[i] = (Xtrain[i] - (0.485, 0.456, 0.406)) / (0.229, 0.224, 0.225)
    
Xtrain = np.array(Xtrain)
Xtrain = torch.from_numpy(Xtrain)

In [42]:
for i in range(len(Xtest)):
  Xtest[i] = transform.resize(Xtest[i], (224, 224))
  Xtest[i] = (Xtest[i] - (0.485, 0.456, 0.406)) / (0.229, 0.224, 0.225)
    
Xtest = np.array(Xtest)
Xtest = torch.from_numpy(Xtest)

Reshape the data in the following format --> (number of images, channels, height, width) as this is the expected input shape for the pretrained models

In [43]:
Xtrain = Xtrain.permute(0, 3, 1, 2).float()
Xtest =   Xtest.permute(0, 3, 1, 2).float()

# Step 2: Feature Extraction using Pre-trained CNNs

We will use VGG-16 for extracting features from the images. For BONUS, you can try other pretrained networks like DenseNet.

For extracting features from the pretrained models we can use any layer. The choice of which layer we want to use can be experimented with. Here I have removed the last layer.

In [44]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

#### Define and use the pretrained VGG model

In [45]:
# This portion takes a long time to run on a CPU
vgg = models.vgg16(pretrained=True)

for param in vgg.parameters():
    param.requires_grad = False
modules=list(vgg.children())[:-1]
vgg=nn.Sequential(*modules)
vgg = vgg.to(device)
vgg.eval()
x_vgg_train = []

# The training set has 1300 images, which many GPUs cannot handle in a single batch, so we will pass the data
# in batches of 250 images
for i in range(0, len(Xtrain), 250):
  x_vgg_train.append(vgg(Xtrain[i:i+250].to(device)).cpu().numpy())

X_vgg_train = np.vstack(x_vgg_train)
X_vgg_train_np = X_vgg_train.reshape(train_len, -1)

# Step 3: Training and Predicting with the Classifier

We will train a RandomForest classifier using the pretrained CNN features we obtained.

In [46]:
# Initialize the classifier and fit to the training data and labels
rf_vgg = RandomForestClassifier(max_depth=10, random_state=0)
rf_vgg.fit(X_vgg_train_np, Ytrain)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=10, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [47]:
#Now apply the trained classifier to the test data
X_vgg_test = vgg(Xtest.to(device)).cpu().numpy()
X_vgg_test_np = X_vgg_test.reshape(test_len, -1)
Y_vgg_rf_predictions = rf_vgg.predict(X_vgg_test_np)

# Step 4: Obtaining Performance Metrics

Metrics using the VGG-16 as the feature extractor

In [48]:
print("The accuracy for the Random Forest model using VGG-16 feature extractor is {}".format(accuracy_score(Ytest, Y_vgg_rf_predictions)))
print()
confusion_matrix(Ytest, Y_vgg_rf_predictions, labels=[0,1,2,3,4,5,6,7,8,9])


The accuracy for the Random Forest model using VGG-16 feature extractor is 0.664



array([[18,  1,  5,  0,  0,  0,  0,  0,  1,  0],
       [ 0, 25,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 1,  1, 22,  0,  1,  0,  0,  0,  0,  0],
       [ 0,  0,  1, 12,  1,  7,  0,  0,  1,  3],
       [ 0,  3,  0,  0, 19,  2,  0,  0,  0,  1],
       [ 1,  0,  0,  1,  3, 19,  0,  0,  0,  1],
       [ 1,  0,  0,  1,  1,  0, 14,  4,  3,  1],
       [ 0,  0,  0,  3,  1,  0,  1, 12,  5,  3],
       [ 0,  0,  0,  1,  0,  1,  4,  5, 13,  1],
       [ 0,  0,  1,  2,  1,  0,  2,  3,  4, 12]])

# Step 5: Retraining using 1300 data set

Using VGG-16 as the feature extractor and Random forest with 1300 data sets

In [49]:
train_a = pd.read_csv(os.path.relpath('./train1300.csv'))
train_a["label"] = train_a["label"].map(labels)
Xtrain, Ytrain = [], []
for index, row in train_a.iterrows():
  img = pil_loader(os.path.realpath('./'+row['filepath']))
  Xtrain.append(np.array(img))
  Ytrain.append(row['label'])

In [50]:
train_len = len(train_a)
test_len = len(test_a)
print(f'length of train is {train_len} and length of test is {test_len}')

length of train is 1300 and length of test is 250


In [51]:
for i in range(len(Xtrain)):
  Xtrain[i] = transform.resize(Xtrain[i], (224, 224))
  Xtrain[i] = (Xtrain[i] - (0.485, 0.456, 0.406)) / (0.229, 0.224, 0.225)
    
Xtrain = np.array(Xtrain)
Xtrain = torch.from_numpy(Xtrain)

In [52]:
Xtrain = Xtrain.permute(0, 3, 1, 2).float()

In [53]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [54]:
# This portion takes a long time to run on a CPU
vgg = models.vgg16(pretrained=True)

for param in vgg.parameters():
    param.requires_grad = False
modules=list(vgg.children())[:-1]
vgg=nn.Sequential(*modules)
vgg = vgg.to(device)
vgg.eval()
x_vgg_train = []

# The training set has 1300 images, which many GPUs cannot handle in a single batch, so we will pass the data
# in batches of 250 images
for i in range(0, len(Xtrain), 250):
  x_vgg_train.append(vgg(Xtrain[i:i+250].to(device)).cpu().numpy())

X_vgg_train = np.vstack(x_vgg_train)
X_vgg_train_np = X_vgg_train.reshape(train_len, -1)

In [55]:
# Initialize the classifier and fit to the training data and labels
rf_vgg = RandomForestClassifier(max_depth=10, random_state=0)
rf_vgg.fit(X_vgg_train_np, Ytrain)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=10, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [56]:
#Now apply the trained classifier to the test data
X_vgg_test = vgg(Xtest.to(device)).cpu().numpy()
X_vgg_test_np = X_vgg_test.reshape(test_len, -1)
Y_vgg_rf_predictions = rf_vgg.predict(X_vgg_test_np)

In [57]:
print("The accuracy for the Random Forest model using VGG-16 feature extractor is {}".format(accuracy_score(Ytest, Y_vgg_rf_predictions)))
print()
confusion_matrix(Ytest, Y_vgg_rf_predictions, labels=[0,1,2,3,4,5,6,7,8,9])

The accuracy for the Random Forest model using VGG-16 feature extractor is 0.764



array([[22,  0,  1,  0,  1,  0,  1,  0,  0,  0],
       [ 0, 24,  1,  0,  0,  0,  0,  0,  0,  0],
       [ 3,  0, 22,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 21,  1,  2,  1,  0,  0,  0],
       [ 0,  0,  0,  0, 25,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  1,  1, 20,  0,  0,  0,  2],
       [ 0,  0,  0,  1,  0,  0, 13,  6,  5,  0],
       [ 0,  0,  0,  1,  0,  1,  2, 16,  3,  2],
       [ 0,  0,  0,  0,  0,  0,  4,  5, 13,  3],
       [ 0,  1,  0,  1,  0,  2,  2,  1,  3, 15]])

# Step 6: Training using Densenet and Random Forest

Using Densenet161 and Random Forest

In [58]:
densenet161 = models.densenet161(pretrained=True, progress=True)

In [59]:
for param in densenet161.parameters():
    param.requires_grad = False
modules=list(densenet161.children())[:-1]
densenet161=nn.Sequential(*modules)
densenet161 = densenet161.to(device)
densenet161.eval()
x_densenet161_train = []

# The training set has 1300 images, which many GPUs cannot handle in a single batch, so we will pass the data
# in batches of 250 images
for i in range(0, len(Xtrain), 250):
  x_densenet161_train.append(densenet161(Xtrain[i:i+250].to(device)).cpu().numpy())

X_densenet161_train = np.vstack(x_densenet161_train)
X_densenet161_train_np = X_densenet161_train.reshape(train_len, -1)

In [60]:
# Initialize the classifier and fit to the training data and labels
rf_densenet161 = RandomForestClassifier(max_depth=10, random_state=0)
rf_densenet161.fit(X_densenet161_train_np, Ytrain)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=10, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=0, verbose=0,
                       warm_start=False)

In [61]:
#Now apply the trained classifier to the test data
X_densenet161_test = densenet161(Xtest.to(device)).cpu().numpy()
X_densenet161_test_np = X_densenet161_test.reshape(test_len, -1)
Y_densenet161_rf_predictions = rf_densenet161.predict(X_densenet161_test_np)

In [62]:
print("The accuracy for the Random Forest model using DenseNet161 feature extractor is {}".format(accuracy_score(Ytest, Y_densenet161_rf_predictions)))
print()
confusion_matrix(Ytest, Y_densenet161_rf_predictions, labels=[0,1,2,3,4,5,6,7,8,9])

The accuracy for the Random Forest model using DenseNet161 feature extractor is 0.812



array([[25,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 1, 20,  3,  0,  1,  0,  0,  0,  0,  0],
       [ 0,  2, 23,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 21,  2,  1,  0,  0,  0,  1],
       [ 0,  0,  0,  2, 23,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  3,  0, 21,  0,  0,  0,  1],
       [ 0,  0,  0,  0,  0,  0, 18,  0,  6,  1],
       [ 0,  0,  0,  0,  0,  0,  2, 17,  5,  1],
       [ 0,  0,  0,  0,  0,  0,  6,  4, 15,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  4,  0, 20]])

# Step 7: Training using Densenet and Extra Trees Classifier

Using Densenet161 and Extra Trees Classifier

In [63]:
# Initialize the classifier and fit to the training data and labels
et_densenet161 = ExtraTreesClassifier(max_depth=10, random_state=0)
et_densenet161.fit(X_densenet161_train_np, Ytrain)



ExtraTreesClassifier(bootstrap=False, class_weight=None, criterion='gini',
                     max_depth=10, max_features='auto', max_leaf_nodes=None,
                     min_impurity_decrease=0.0, min_impurity_split=None,
                     min_samples_leaf=1, min_samples_split=2,
                     min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
                     oob_score=False, random_state=0, verbose=0,
                     warm_start=False)

In [64]:
#Now apply the trained classifier to the test data
X_densenet161_test = densenet161(Xtest.to(device)).cpu().numpy()
X_densenet161_test_np = X_densenet161_test.reshape(test_len, -1)
Y_densenet161_et_predictions = et_densenet161.predict(X_densenet161_test_np)

In [65]:
print("The accuracy for the Extra Trees Classifier using DenseNet161 feature extractor is {}".format(accuracy_score(Ytest, Y_densenet161_et_predictions)))
print()
confusion_matrix(Ytest, Y_densenet161_et_predictions, labels=[0,1,2,3,4,5,6,7,8,9])

The accuracy for the Decision Tree model using DenseNet161 feature extractor is 0.8



array([[24,  0,  1,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 23,  2,  0,  0,  0,  0,  0,  0,  0],
       [ 2,  1, 22,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 18,  4,  3,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 23,  2,  0,  0,  0,  0],
       [ 0,  0,  0,  3,  1, 20,  0,  0,  0,  1],
       [ 0,  0,  0,  0,  0,  0, 16,  4,  5,  0],
       [ 0,  0,  0,  0,  0,  0,  1, 18,  5,  1],
       [ 0,  0,  0,  0,  0,  0,  2,  3, 17,  3],
       [ 0,  0,  0,  1,  0,  2,  0,  1,  2, 19]])