In [1]:
#Importing Necessary packages

import pandas as pd
import cv2
import numpy as np
import os
import random
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from collections import Counter
from random import sample
from sklearn.metrics import classification_report
import utils

## 1.Creating Dataset


In [None]:
path = 'training/training'
feature_set,label_set = utils.dataset(path)

## 2.Train - Validation Split

In [7]:
l = [x for x in range(0, len(feature_set))]  #temporary list to store the number of images in feature set
train_ind = random.sample(range(0, len(feature_set)), int(0.70*len(feature_set))) # Identifying indexes for training set
val_ind = [x for x in l if x not in train_ind] # Identifying indexes for validation set

In [8]:
X_train = [feature_set[i] for i in train_ind] #Creating Training set
X_val = [feature_set[i] for i in val_ind] #Creating validation set
y_train = [label_set[i] for i in train_ind]
y_val = [label_set[i] for i in val_ind]

## 3. Learning Vocabulary

In [10]:
# Making dataset for learning vocab
feature_vocab=[]
for i in X_train:
    for j in i:
        feature_vocab.append(j)

In [11]:
# Shuffelling the Dataset
random.shuffle(feature_vocab)

In [12]:
# Extracting Kmeans model which learns the vocabulary
kmeans = utils.learning_words(feature_vocab[:int(len(feature_vocab)*0.25)])

In [None]:
mapped_df_train = utils.mapping(kmeans,X_train) #Mapping dataset with vocabulary
mapped_df_train['label']=y_train

## 4. Creating Classifiers- 1 linear classifiers for each repective class

In [17]:
classes= list(set(mapped_df_train['label'])) #getting a list of different classes= 15 scenes
classifiers = [] #List to store model for each class

In [None]:
#Loop to store classifiers in a list
for i in classes:
    classify = utils.classifier(mapped_df_train,i)
    classifiers.append(classify)

## 7. Making Predictions

### Making Predicitions on Training set

In [23]:
"""# Making Predicitions Using Training set"""
y_train_pred = utils.predict(mapped_df_train,classifiers)
print(classification_report(y_train,y_train_pred))

              precision    recall  f1-score   support

       Coast       0.84      0.67      0.75        64
      Forest       0.97      0.82      0.89        72
     Highway       0.70      0.84      0.76        67
  Insidecity       0.89      0.64      0.74        66
    Mountain       0.93      0.77      0.84        73
      Office       0.85      0.58      0.69        67
 OpenCountry       0.69      0.78      0.74        64
      Street       0.65      0.94      0.77        68
      Suburb       0.99      0.95      0.97        76
TallBuilding       0.90      0.53      0.67        68
     bedroom       0.52      0.56      0.54        78
  industrial       0.47      0.69      0.56        68
     kitchen       0.53      0.65      0.59        74
  livingroom       0.47      0.49      0.48        72
       store       0.73      0.71      0.72        73

    accuracy                           0.71      1050
   macro avg       0.74      0.71      0.71      1050
weighted avg       0.74   

### Making Predictions Using Validation set

In [25]:
mapped_df_val = utils.mapping(kmeans,X_val) #Creating histogram using vocabulary for validation set
mapped_df_val['label']=y_val
y_val_pred = utils.predict(mapped_df_val,classifiers)
print(classification_report(y_val,y_val_pred))

              precision    recall  f1-score   support

       Coast       0.68      0.42      0.52        36
      Forest       0.85      0.61      0.71        28
     Highway       0.66      0.64      0.65        33
  Insidecity       0.64      0.41      0.50        34
    Mountain       0.75      0.67      0.71        27
      Office       0.67      0.42      0.52        33
 OpenCountry       0.60      0.75      0.67        36
      Street       0.48      0.78      0.60        32
      Suburb       0.74      0.71      0.72        24
TallBuilding       0.30      0.09      0.14        32
     bedroom       0.36      0.59      0.45        22
  industrial       0.11      0.16      0.13        32
     kitchen       0.45      0.58      0.51        26
  livingroom       0.30      0.32      0.31        28
       store       0.38      0.48      0.43        27

    accuracy                           0.50       450
   macro avg       0.53      0.51      0.50       450
weighted avg       0.53   

### Making predictions on test set

In [None]:
test_path = 'testing/testing'
feature_set,label_set = dataset_test(test_path) #Creating testing dataset

mapped_test_set = utils.mapping(kmeans,feature_set) #Creating histogram using vocabulary for validation set
mapped_test_set['label']=0
y_test_pred = utils.predict(mapped_test_set,classifiers) #Making Prediciton on test set

In [50]:
#Storing predictions in a datframe for testing dataset
lists =[m+" "+n for m,n in zip(label_set,y_test_pred)]
re=pd.DataFrame(lists,columns=['Name'])
re['Num']= [int(x.split('.')[0]) for x in re['Name'] ]
re = re.sort_values(by=['Num'])
re = re.reset_index()
sub = list(re['Name'])

#Saving the predictions in a txtfile
with open(r'results/run2.txt', 'w') as fp:
    for item in sub:
        # write each item on a new line
        fp.write("%s\n" % item)
    print('Done')