In [1]:
#Perform Validation for Classifer Models

In [2]:
import cv2 as cv
import os
import pandas as pd
import numpy as np
import json
import pickle
import time
import winsound

In [3]:
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.utils import shuffle


In [4]:
start_program = time.time()
duration = 800  # milliseconds
freq = 300 # Hz
winsound.Beep(freq, duration)

In [5]:
n_cluster = 1500
#Trial_1_KMeans_c300_b32_rs0.sav
kmeans_name = "_".join(("Trial_1_KMeans", "c"+str(n_cluster), "b32_rs0.sav"))
with open(kmeans_name, 'rb') as f_name:
    kmeans_batch = pickle.load(f_name)
print(kmeans_name)

Trial_1_KMeans_c1500_b32_rs0.sav


In [6]:
SVC_name = "_".join(("SVC","VoW_MinMax","C"+str(n_cluster)+".sav"))
with open(SVC_name, 'rb') as f_name:
    svc_model = pickle.load(f_name)
print(SVC_name)


SVC_VoW_MinMax_C1500.sav


In [7]:
mm_name = "_".join(("MinMax","C"+str(n_cluster)+".sav"))
with open(mm_name, 'rb') as f_name:
    mm_scaler = pickle.load(f_name)
print(mm_name)


MinMax_C1500.sav


In [8]:
#Testing the prediction accuracy with Validation Data!
#Let's import all the feature descriptors!
path_dir = os.getcwd()
valid_dir = "\\".join((path_dir, "ORB_Dataset", "val"))
valid_files = os.listdir(valid_dir)
valid_car_dir = "\\".join((valid_dir, valid_files[0]))
valid_noise_dir = "\\".join((valid_dir, valid_files[1]))


In [9]:
start_time = time.time()
valid_car_descriptors = []
with open(valid_car_dir, 'r') as fil_name:
    valid_car_descriptors = json.load(fil_name)
print(time.time() - start_time)

start_time = time.time()
valid_noise_descriptors = []
with open(valid_noise_dir, 'r') as fil_name:
    valid_noise_descriptors = json.load(fil_name)
print(time.time() - start_time)

2.551635980606079
2.8164963722229004


In [10]:
def cluster_labels(pickle_file, model):
    start_time = time.time()
    #pickle_file = car_descriptors
    
    img_cluster = []
    for img_desc in pickle_file:
        cluster_desc = []
        if len(img_desc)> 0:
            #for desc in img_desc:
            cluster_desc = model.predict(img_desc)
            img_cluster.append(cluster_desc)
    print(time.time() - start_time)
    return(img_cluster)

In [11]:
valid_car_clusters = np.array(cluster_labels(pickle_file = valid_car_descriptors, model = kmeans_batch))
valid_noise_clusters = np.array(cluster_labels(pickle_file = valid_noise_descriptors, model = kmeans_batch))

14.785331010818481
15.077357292175293


In [12]:
def cluster_histogram(img_clusters, n_clusters = 500):
    start_time = time.time()
    hist_arr = []
    for img in img_clusters:
        hist = np.zeros(n_clusters)
        for cluster in img:
            hist[cluster] += 1
        hist_arr.append(hist)
    print(time.time() - start_time)
    return(hist_arr)

In [13]:
car_hist = cluster_histogram(img_clusters = valid_car_clusters, n_clusters = n_cluster)
noise_hist = cluster_histogram(img_clusters = valid_noise_clusters, n_clusters = n_cluster)

0.3626251220703125
0.3415672779083252


In [14]:
car_df = pd.DataFrame(car_hist)
car_df = pd.DataFrame(mm_scaler.transform(car_df))

In [15]:
noise_df = pd.DataFrame(noise_hist)
noise_df = pd.DataFrame(mm_scaler.transform(noise_df))

In [16]:
car_df['y'] = 1
noise_df['y'] = 0

In [17]:
df = pd.concat([car_df, noise_df])
X = df.drop(columns = ['y'])
y = df['y']
display(X.head())
display(y.head())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1490,1491,1492,1493,1494,1495,1496,1497,1498,1499
0,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,...,0.125,0.0,0.0,0.0,0.4,0.0,0.0,0.142857,0.125,0.166667
1,0.0,0.0,0.0,0.0,0.6,0.142857,0.0,0.0,0.0,0.142857,...,0.125,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.0,0.0,0.0,...,0.125,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.166667
3,0.0,0.0,0.0,0.0,0.0,0.0,0.111111,0.125,0.0,0.0,...,0.0,0.0,0.0,0.0,0.2,0.166667,0.2,0.285714,0.0,0.0
4,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.0,...,0.25,0.0,0.142857,0.2,0.0,0.0,0.0,0.142857,0.0,0.0


0    1
1    1
2    1
3    1
4    1
Name: y, dtype: int64

In [18]:
X_valid, Y_valid = shuffle(X.values, y.values)

In [19]:
Y_val_predict = svc_model.predict(X_valid)

In [20]:
print("For Clusters: ", n_cluster)
print("F1_score for Validation with clusters: {} = \t{:.4f}".format(n_cluster, f1_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("Accuracy for Validation with clusters: {} = \t{:.4f}".format(n_cluster, accuracy_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("Precision for Validation with clusters: {} = \t{:.4f}".format(n_cluster, precision_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("Recall for Validation with clusters: {} = \t{:.4f}".format(n_cluster, recall_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("AUC Score for Validation with clusters: {} = \t{:.4f}".format(n_cluster, roc_auc_score(y_true = Y_valid, y_score = Y_val_predict)) )


For Clusters:  1500
F1_score for Validation with clusters: 1500 = 	0.8141
Accuracy for Validation with clusters: 1500 = 	0.8222
Precision for Validation with clusters: 1500 = 	0.8372
Recall for Validation with clusters: 1500 = 	0.7922
AUC Score for Validation with clusters: 1500 = 	0.8217


In [21]:
winsound.Beep(freq, duration)