In [84]:
#Importing necessary packages
import os
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

## 1. Dataset preparation

In [85]:
data = {'img_path': [], 'label': []}

for root, dirs, files in os.walk("./training/"):
    label = os.path.basename(root)
    for file in files:
        if file.endswith('.jpg'):
            data['img_path'].append(os.path.join(root, file))
            data['label'].append(label)

df_data = pd.DataFrame(data)

In [86]:
df_data.head()

Unnamed: 0,img_path,label
0,./training/Forest/63.jpg,Forest
1,./training/Forest/77.jpg,Forest
2,./training/Forest/88.jpg,Forest
3,./training/Forest/89.jpg,Forest
4,./training/Forest/76.jpg,Forest


## 2. Feature extraction

In [88]:
df_data['features'] = feature_extraction_r1(df_data['img_path'])

Unnamed: 0,img_path,label,features
0,./training/Forest/63.jpg,Forest,"[0.023682598039215685, 0.023682598039215685, 0..."
1,./training/Forest/77.jpg,Forest,"[0.008118872549019608, 0.06302083333333333, 0...."
2,./training/Forest/88.jpg,Forest,"[-0.007613357843137255, -0.07428002450980392, ..."
3,./training/Forest/89.jpg,Forest,"[0.27987132352941174, 0.35830269607843135, 0.4..."
4,./training/Forest/76.jpg,Forest,"[-0.13118872549019608, -0.09589460784313726, 0..."


## 3.Train - Validation Split

In [None]:
X = np.array(df_data['features'].to_list())
y = np.array(df_data['label'].to_list())
print(X.shape, y.shape)

# split data into train and val set with 10% data in the val set.
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state = 521)

##  4. Classifier : k neighbour classifier

In [92]:
# instantiate k neighbour classifier with 100 clusters
neigh = KNeighborsClassifier(n_neighbors=100)
# fit classifier on training data and train labels
neigh.fit(X_train, y_train)

In [93]:
# calculate accuracy of test data
accuracy_score(y_val, neigh.predict(X_test))

0.2

## 7. Making Predictions

### Making Predicitions on Validation set

In [95]:
# generate the classification report
y_pred = neigh.predict(X_test)
print('accuracy:', accuracy_score(y_val, y_pred))
print(classification_report(y_val, y_pred))

accuracy: 0.2
              precision    recall  f1-score   support

       Coast       0.23      0.25      0.24        20
      Forest       0.14      0.74      0.23        23
     Highway       0.15      0.90      0.26        20
  Insidecity       0.00      0.00      0.00        18
    Mountain       0.00      0.00      0.00        26
      Office       0.33      0.05      0.09        19
 OpenCountry       0.57      0.17      0.27        23
      Street       0.62      0.71      0.67        14
      Suburb       1.00      0.14      0.24        22
TallBuilding       0.29      0.10      0.14        21
     bedroom       0.00      0.00      0.00        21
  industrial       0.00      0.00      0.00        19
     kitchen       0.00      0.00      0.00        16
  livingroom       0.00      0.00      0.00        21
       store       0.00      0.00      0.00        17

    accuracy                           0.20       300
   macro avg       0.22      0.20      0.14       300
weighted avg

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [96]:
print(confusion_matrix(y_val, y_pred))          # Printing confusion matrix of validation set

array([[ 5,  5,  8,  0,  0,  0,  2,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 2, 17,  4,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  2, 18,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 1, 10,  7,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 3, 17,  6,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 3,  6,  8,  0,  0,  1,  0,  0,  0,  1,  0,  0,  0,  0,  0],
       [ 0,  8, 11,  0,  0,  0,  4,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  2,  1,  0,  0,  0,  0, 10,  0,  1,  0,  0,  0,  0,  0],
       [ 1,  4,  8,  1,  0,  1,  1,  3,  3,  0,  0,  0,  0,  0,  0],
       [ 0, 11,  7,  0,  0,  0,  0,  1,  0,  2,  0,  0,  0,  0,  0],
       [ 2,  8,  8,  0,  0,  1,  0,  0,  0,  2,  0,  0,  0,  0,  0],
       [ 2,  3, 12,  0,  0,  0,  0,  1,  0,  1,  0,  0,  0,  0,  0],
       [ 2,  9,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 1,  8, 11,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 0, 13,  3,  0,  0,  0,  0

### Making Predicitions on Test set

In [78]:
# Reading the test Dataset
test_file_name = []
for root, dirs, files in os.walk("./testing/testing"):
    label = os.path.basename(root)
    for file in files:
        if file.endswith('.jpg'):
            test_file_name.append(os.path.join(root, file))

test_data = feature_extraction_r1(test_file_name) #Extracting feature from test data


y_pred = neigh.predict(test_data) # prediction

In [75]:
#Storing predictions in a datframe for testing dataset
lists = [m + " " + n for m, n in zip(test_file_name, y_pred)]
re = pd.DataFrame(lists, columns=['Name'])
re['Num'] = [int(x.split('.')[0]) for x in re['Name']]
re = re.sort_values(by=['Num'])
re = re.reset_index()
sub = list(re['Name'])

with open(r'output/run1.txt', 'w') as fp:
    for item in sub:
        # write each item on a new line
        fp.write("%s\n" % item)
    print('Done')


Done
