In [107]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd


from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, top_k_accuracy_score
from sklearn import preprocessing
from sklearn.naive_bayes import MultinomialNB, GaussianNB, BernoulliNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

## 1. Initiate varibles

In [108]:
k = 200 # number of representative vectors
feature_type = 'spatial_pyramid' 
# ['dense_sift', 'pyramid_dense_sift', 'spatial_pyramid', 
#  'dense_orb', 'spatial_pyramid_orb',
#  'dense_brief', 'spatial_pyramid_brief']
sift_step_size = 5
num_level = 2

## 2. Dataset preparation

In [109]:
data = {'img_path': [], 'label': []}

for root, dirs, files in os.walk("./training/"):
    label = os.path.basename(root)
    for file in files:
        if file.endswith('.jpg'):
            data['img_path'].append(os.path.join(root, file))
            data['label'].append(label)

df_data = pd.DataFrame(data)

In [110]:
df_data.head()

Unnamed: 0,img_path,label
0,./training/Forest/63.jpg,Forest
1,./training/Forest/77.jpg,Forest
2,./training/Forest/88.jpg,Forest
3,./training/Forest/89.jpg,Forest
4,./training/Forest/76.jpg,Forest


## 3. Feature Extraction and Bag of visual words

In [112]:
visual_words = []

for i, row in df_data.iterrows():
    # read image from img_path then convert to gray scale
    image = cv2.imread(row['img_path'])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # get descriptors for each image
    if feature_type in ['dense_sift', 'pyramid_dense_sift', 'spatial_pyramid']:
        visual_words.append(utils.dense_sift(image, sift_step_size))
    elif feature_type in ['dense_orb', 'spatial_pyramid_orb']:
        visual_words.append(utils.dense_orb(image, sift_step_size))
    elif feature_type in ['dense_brief', 'spatial_pyramid_brief']:
        visual_words.append(utils.dense_brief(image, sift_step_size))
    elif feature_type == 'dense_surf':
        visual_words.append(dense_surf(image, sift_step_size))

df_data['visual_words'] = visual_words

In [113]:
df_data.head()

Unnamed: 0,img_path,label,visual_words
0,./training/Forest/63.jpg,Forest,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
1,./training/Forest/77.jpg,Forest,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
2,./training/Forest/88.jpg,Forest,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
3,./training/Forest/89.jpg,Forest,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."
4,./training/Forest/76.jpg,Forest,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,..."


### Create representation vectors - codebook

In [114]:
# preparing bag of visual words
BoVW = df_data['visual_words'].to_list()
BoVW = np.vstack(BoVW)

print('bag of visual words size:', BoVW.shape)

bag of visual words size: (4060424, 128)


In [115]:
# K-Means clustering
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
flags = cv2.KMEANS_RANDOM_CENTERS
compactness, labels, centres = cv2.kmeans(BoVW, k, None, criteria, 10, flags)

### Histograms of bags of visual words

In [118]:
his_bovw = []
print(feature_type)

for i, row in df_data.iterrows():
    # read image from img_path then convert to gray scale
    image = cv2.imread(row['img_path'])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # dense SIFT
    if feature_type == 'dense_sift':
        his_bovw.append(utils.histogram_bovw(utils.dense_sift(image, sift_step_size), 
                                       centres, k))
    
    # pyramid dense SIFT
    elif feature_type == 'pyramid_dense_sift':
        his_bovw.append(utils.histogram_bovw(utils.pyramid_dense_sift(image, sift_step_size, 
                                                          num_level), centres, k))
    # dense SIFT with spatial pooling    
    elif feature_type == 'spatial_pyramid':
        his_bovw.append(utils.spatial_pyramid(image, centres, k, sift_step_size, num_level))
        
    # dense ORB
    elif feature_type == 'dense_orb':
        his_bovw.append(utils.histogram_bovw(utils.dense_orb(image, sift_step_size), 
                                       centres, k))
    
    # dense ORB with spatial pooling  
    elif feature_type == 'spatial_pyramid_orb':
        his_bovw.append(utils.spatial_pyramid_orb(image, centres, k, sift_step_size, num_level))
    
    # dense BRIEF 
    elif feature_type == 'dense_brief':
        his_bovw.append(utils.histogram_bovw(utils.dense_brief(image, sift_step_size), 
                                       centres, k))
    
    # dense BRIEF with spatial pooling  
    elif feature_type == 'spatial_pyramid_brief':
        his_bovw.append(utils.spatial_pyramid_brief(image, centres, k, sift_step_size, num_level))

his_bovw = np.array(his_bovw)

spatial_pyramid


In [119]:
his_bovw = np.array(his_bovw)
his_bovw.shape

(1500, 1000)

## 4. Classifiers

In [183]:
X = his_bovw
y = np.array(df_data['label'].to_list())
print(X.shape, y.shape)

(1500, 1000) (1500,)


### Train - Validation Split

In [184]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7)

# normalizing
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

### SVC (kernel: rbf)

In [187]:
clf = SVC(probability=True).fit(X_train_norm, y_train)
y_pred = clf.predict(X_test_norm)
print('accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

accuracy: 0.7033333333333334
              precision    recall  f1-score   support

       Coast       0.80      0.70      0.74        23
      Forest       0.90      0.86      0.88        21
     Highway       0.91      0.74      0.82        27
  Insidecity       0.68      0.83      0.75        18
    Mountain       0.70      0.78      0.74        18
      Office       0.70      0.84      0.76        19
 OpenCountry       0.48      0.67      0.56        15
      Street       0.88      0.84      0.86        25
      Suburb       0.91      1.00      0.95        21
TallBuilding       0.84      0.94      0.89        17
     bedroom       0.44      0.33      0.38        21
  industrial       0.67      0.56      0.61        18
     kitchen       0.36      0.38      0.37        21
  livingroom       0.40      0.29      0.33        21
       store       0.72      0.87      0.79        15

    accuracy                           0.70       300
   macro avg       0.69      0.71      0.69       3

In [188]:
confusion_matrix(y_test, y_pred)

array([[16,  0,  2,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 18,  0,  0,  0,  0,  1,  1,  0,  1,  0,  0,  0,  0,  0],
       [ 2,  0, 20,  1,  2,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 15,  1,  0,  0,  1,  0,  1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 14,  0,  3,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  1,  0, 16,  0,  0,  0,  0,  1,  0,  1,  0,  0],
       [ 2,  1,  0,  0,  2,  0, 10,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  1,  0,  3,  0,  0,  0, 21,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0, 21,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  1,  0,  0,  0,  0, 16,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  0,  0,  7,  1,  7,  4,  1],
       [ 0,  0,  0,  0,  0,  1,  0,  0,  2,  0,  1, 10,  0,  2,  2],
       [ 0,  0,  0,  0,  0,  4,  0,  1,  0,  0,  4,  1,  8,  3,  0],
       [ 0,  0,  0,  2,  0,  1,  0,  0,  0,  0,  3,  2,  5,  6,  2],
       [ 0,  0,  0,  0,  0,  0,  0

### Multinomial Naive Bayes

In [191]:
# clf = MultinomialNB().fit(X_train, y_train)
clf = GaussianNB().fit(X_train, y_train)
# clf = BernoulliNB().fit(X_train, y_train)
y_pred = clf.predict(X_test)
print('accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

accuracy: 0.5766666666666667
              precision    recall  f1-score   support

       Coast       0.78      0.61      0.68        23
      Forest       0.88      0.67      0.76        21
     Highway       0.79      0.70      0.75        27
  Insidecity       0.57      0.72      0.63        18
    Mountain       0.40      0.44      0.42        18
      Office       0.43      0.53      0.48        19
 OpenCountry       0.36      0.27      0.31        15
      Street       0.67      0.72      0.69        25
      Suburb       1.00      0.90      0.95        21
TallBuilding       0.56      0.59      0.57        17
     bedroom       0.36      0.24      0.29        21
  industrial       0.39      0.39      0.39        18
     kitchen       0.31      0.43      0.36        21
  livingroom       0.55      0.57      0.56        21
       store       0.61      0.73      0.67        15

    accuracy                           0.58       300
   macro avg       0.58      0.57      0.57       3

### Random forest

In [194]:
clf = RandomForestClassifier().fit(X_train_norm, y_train)
y_pred = clf.predict(X_test_norm)
print('accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

accuracy: 0.67
              precision    recall  f1-score   support

       Coast       0.75      0.65      0.70        23
      Forest       0.82      0.86      0.84        21
     Highway       0.88      0.78      0.82        27
  Insidecity       0.62      0.72      0.67        18
    Mountain       0.81      0.72      0.76        18
      Office       0.68      0.79      0.73        19
 OpenCountry       0.45      0.60      0.51        15
      Street       0.86      0.76      0.81        25
      Suburb       0.88      1.00      0.93        21
TallBuilding       0.62      0.94      0.74        17
     bedroom       0.42      0.38      0.40        21
  industrial       0.61      0.61      0.61        18
     kitchen       0.41      0.43      0.42        21
  livingroom       0.50      0.24      0.32        21
       store       0.57      0.53      0.55        15

    accuracy                           0.67       300
   macro avg       0.66      0.67      0.66       300
weighted av

In [196]:
# top k accuracy
top_k_accuracy_score(y_test, clf.predict_proba(X_test_norm), k=2)

0.85

#### Hyperparameter tuning

In [54]:
parameters = {'kernel':['rbf'], 'C':np.linspace(1, 10, 50), 'gamma': [0.1, 1.0, 10, 100]}
grid_search = GridSearchCV(SVC(), parameters)
grid_search.fit(X_train, y_train)
grid_search.best_score_

0.7025

In [55]:
grid_search.best_params_

{'C': 3.061918367346939, 'gamma': 100, 'kernel': 'rbf'}

In [56]:
y_pred = grid_search.predict(X_test)
print('accuracy:', accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

accuracy: 0.7033333333333334
              precision    recall  f1-score   support

       Coast       0.70      0.70      0.70        23
      Forest       0.90      0.86      0.88        21
     Highway       0.83      0.74      0.78        27
  Insidecity       0.71      0.83      0.77        18
    Mountain       0.74      0.78      0.76        18
      Office       0.65      0.68      0.67        19
 OpenCountry       0.53      0.67      0.59        15
      Street       0.91      0.84      0.87        25
      Suburb       0.95      1.00      0.98        21
TallBuilding       0.75      0.88      0.81        17
     bedroom       0.44      0.38      0.41        21
  industrial       0.88      0.39      0.54        18
     kitchen       0.43      0.43      0.43        21
  livingroom       0.44      0.52      0.48        21
       store       0.76      0.87      0.81        15

    accuracy                           0.70       300
   macro avg       0.71      0.70      0.70       3

## 5. Making predicitions on test set

In [172]:
test_data = []
test_file_name = []

for root, dirs, files in os.walk("./testing/"):
    for file in files:
        if file.endswith('.jpg'):
            test_file_name.append(file)
            
            img_path = os.path.join(root, file)
            # read image from img_path then convert to gray scale
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
            
            test_data.append(utils.spatial_pyramid(image, centres, k, sift_step_size, num_level))

In [173]:
X_train = X.copy()
X_test = np.array(test_data)
y_train = y.copy()

# normalizing
scaler = preprocessing.StandardScaler().fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

clf = SVC(probability=True, C=2.2).fit(X_train_norm, y_train)
y_pred = clf.predict(X_test_norm)

In [180]:
#Storing predictions in a datframe for testing dataset
lists = [m + " " + n for m, n in zip(test_file_name, y_pred)]
re = pd.DataFrame(lists, columns=['Name'])
re['Num'] = [int(x.split('.')[0]) for x in re['Name']]
re = re.sort_values(by=['Num'])
re = re.reset_index()
sub = list(re['Name'])

with open(r'run3.txt', 'w') as fp:
    for item in sub:
        # write each item on a new line
        fp.write("%s\n" % item)
    print('Done')


Done
