In [150]:
import pandas as pd
import numpy as np 
import os
import time
import pickle
import winsound


In [151]:
from sklearn.decomposition import LatentDirichletAllocation as LDA, PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.utils import shuffle
from sklearn.preprocessing import MinMaxScaler

In [152]:
start_program = time.time()
duration = 800  # milliseconds
freq = 300 # Hz
winsound.Beep(freq, duration)
n_component = 1000

In [153]:
#Importing histograms!
folder_path = os.getcwd()
hist_path = "\\".join((folder_path, "FREAK_HISTOGRAM", "Histograms"))
train_car_path = "\\".join((hist_path, "Car", "Train"))
train_noise_path = "\\".join((hist_path, "nocar", "Train"))
valid_car_path = "\\".join((hist_path, "Car", "Val"))
valid_noise_path = "\\".join((hist_path, "nocar", "Val"))

In [154]:
train_car_files = os.listdir(train_car_path)
train_car_hist_path = "\\".join((train_car_path, train_car_files[0]))
train_noise_files = os.listdir(train_noise_path)
train_noise_hist_path = "\\".join((train_noise_path, train_noise_files[0]))

valid_car_files = os.listdir(valid_car_path)
valid_car_hist_path = "\\".join((valid_car_path, valid_car_files[0]))
valid_noise_files = os.listdir(valid_noise_path)
valid_noise_hist_path = "\\".join((valid_noise_path, valid_noise_files[0]))

In [155]:
train_car_hist = pd.DataFrame(pickle.load(open(train_car_hist_path, 'rb')))
train_noise_hist = pd.DataFrame(pickle.load(open(train_noise_hist_path, 'rb')))

valid_car_hist = pd.DataFrame(pickle.load(open(valid_car_hist_path, 'rb')))
valid_noise_hist = pd.DataFrame(pickle.load(open(valid_noise_hist_path, 'rb')))

In [156]:
X_train_car = np.vstack(train_car_hist[1].values.tolist())
Y_train_car = train_car_hist[0]

In [157]:
X_train_noise= np.vstack(train_noise_hist[1].values.tolist())
Y_train_noise= train_noise_hist[0]

In [158]:
X_train = np.concatenate((X_train_car, X_train_noise))
Y_train = np.concatenate((Y_train_car, Y_train_noise))

In [159]:
print(np.shape(X_train_car))
print(np.shape(X_train_noise))

(6554, 16384)
(6776, 16384)


In [160]:
mm_scaler = MinMaxScaler()
X_train = mm_scaler.fit_transform(X_train)

In [161]:
# print(np.shape(X_train))

In [162]:
index = np.random.permutation(len(X_train))
X_train_shuffled = X_train[index]
Y_train_shuffled = Y_train[index]

In [163]:
X_valid_car = np.vstack(valid_car_hist[1].values.tolist())
Y_valid_car = valid_car_hist[0]
X_valid_noise = np.vstack(valid_noise_hist[1].values.tolist())
Y_valid_noise = valid_noise_hist[0]

In [164]:
X_valid = np.concatenate((X_valid_car, X_valid_noise))
Y_valid = np.concatenate((Y_valid_car, Y_valid_noise))
X_valid = mm_scaler.transform(X_valid)
index = np.random.permutation(len(X_valid))
X_valid_shuffled = X_valid[index]
Y_valid_shuffled = Y_valid[index]

In [165]:
start_time = time.time()
pca_iter = PCA(n_components = n_component)
X_train_pca = pca_iter.fit_transform(X_train_shuffled)
print("PCA: ".format(pca_iter))
# print("Last Component Variance: {:.4f}".format(pca_iter.explained_variance_[-1]))
print("Time: {:.4f}".format(time.time() - start_time))

PCA: 
Time: 96.0086


Data Entry for ORB
* PCA_500: Time = 92.37 seconds
* PCA_5000: Time = 597.48 seconds


In [166]:
print("Last Component Variance: {:.4f}".format(pca_iter.explained_variance_[-1]))


Last Component Variance: 0.0426


In [167]:
X_valid_pca = pca_iter.transform(X_valid_shuffled)

In [168]:
pca_name = "_".join(("PCA_FREAK_Scaled","C"+str(n_component)+".sav"))
print(pca_name)
pickle.dump(pca_iter, open(pca_name, 'wb'))

PCA_FREAK_Scaled_C1000.sav


In [169]:
start_time = time.time()
model_svm = SVC()
model_svm.fit(X_train_pca, Y_train)
print("Time {:.2f}".format(time.time() - start_time))

Time 267.26


* SVM_PCA_500: 186 seconds
* SVM_PCA_5000: 2224.40 seconds

In [170]:
X_valid_pca = pca_iter.transform(X_valid)
Y_val_predict = model_svm.predict(X = X_valid_pca)

In [177]:
print("For Components: ", n_component)
print("Last Component Variance with components: {} = \t\t{:.4f}".format(n_component, pca_iter.explained_variance_[-1]))
print("")
print("F1_score for Validation with components: {} = \t\t{:.4f}".format(n_component, f1_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("Accuracy for Validation with components: {} = \t\t{:.4f}".format(n_component, accuracy_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("Precision for Validation with components: {} = \t\t{:.4f}".format(n_component, precision_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("Recall for Validation with components: {} = \t\t\t{:.4f}".format(n_component, recall_score(y_true = Y_valid, y_pred = Y_val_predict)) )
print("AUC Score for Validation with components: {} = \t\t{:.4f}".format(n_component, roc_auc_score(y_true = Y_valid, y_score = Y_val_predict)) )

For Components:  1000
Last Component Variance with components: 1000 = 		0.0426

F1_score for Validation with components: 1000 = 		0.4149
Accuracy for Validation with components: 1000 = 		0.4922
Precision for Validation with components: 1000 = 		0.4785
Recall for Validation with components: 1000 = 			0.3663
AUC Score for Validation with components: 1000 = 		0.4901


* SVM_PCA_500_MM: F1 Score = 0.4725
* SVM_PCA_500_MM: F1 Score = 0.5114

In [172]:
svc_name = "_".join(("PCA_SVM_FREAK_Scaled","C"+str(n_component)+".sav"))
print(svc_name)
pickle.dump(model_svm, open(svc_name, 'wb'))

PCA_SVM_FREAK_Scaled_C1000.sav


In [173]:
winsound.Beep(freq, duration)
print("Time: {:.2f}".format(time.time() - start_program))

Time: 426.06
