In [24]:
import pandas as pd, numpy as np, numpy.linalg as nl, inspect, seaborn as sns, plotly, plotly.express as px, pickle, plotly.graph_objects as go, itertools
from tqdm import tqdm
from matplotlib.pyplot import *
from pathlib import Path
from sklearn.cluster import KMeans, AgglomerativeClustering,DBSCAN
import kneed
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, classification_report,precision_recall_fscore_support, normalized_mutual_info_score,confusion_matrix,ConfusionMatrixDisplay
from sklearn.metrics.cluster import contingency_matrix
from matplotlib import cm
from scipy.cluster.hierarchy import dendrogram, ward,single,complete
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA, KernelPCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC,LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier, AdaBoostClassifier,StackingClassifier
from pandas.api.types import is_numeric_dtype
from sklearn.tree import DecisionTreeClassifier
from sklearn.calibration import CalibratedClassifierCV
import matplotlib.image as mpimg

In [25]:
mapping = dict(np.flip(np.array([[0,'chevrolet_impala_2004-2005'],
    [1,  'dodge_grand caravan_2005-2006'],
    [2,  'ford_explorer_2001-2002'],
    [3,  'honda_accord_2005-2006'],
    [4,  'nissan_altima_2005-2006'],
    [5,  'toyota_camry_2002-2003'],
    [6,  'volkswagen_jetta_2002-2003'],
    [7,  'ford_mustang_2000-2001'],
    [8,  'ford_taurus_2002-2003'],
    [9,  'honda_odyssey_2005-2006']]),axis=1))

In [26]:
#Edit these with where you'd like the results to go or the data to come from
result_location = Path('results.csv')
data_dir = Path('../Data')


metadata = pd.read_csv(Path(data_dir,'train_data.csv'),header=None,names=['full_class','filename'])
metadata[['make','model','year']] = metadata['full_class'].str.split('_',expand=True)
metadata['class'] = metadata['full_class'].apply(lambda x: mapping[x])
metadata['class'] = metadata['class'].astype(int)
metadata['file_path'] = str(data_dir)+'/train_images/'+metadata['full_class']+'/' + metadata['filename']
features = pd.read_csv(Path(data_dir,'train_features.csv'),header=None)
test_file = pd.read_csv(Path(data_dir,'test_features.csv'),header=None)


In [27]:
x_train = features.values
y_train = metadata['class'].values
x_test = test_file.values

In [28]:
STATE = 1

In [29]:
mlp_model_1 = MLPClassifier(alpha = .4, hidden_layer_sizes=[256,128,64,64],learning_rate_init=.004)
mlp_model_2 = MLPClassifier(alpha = .4, hidden_layer_sizes=[256,128,64,64],learning_rate_init=.004,learning_rate='adaptive')
svm_model = SVC(C=1.25,gamma=.12857142857142856,probability=True)
rf_model = RandomForestClassifier(max_depth=100,n_estimators=450)

In [30]:
estimators = [('Kmean_mlp',
               Pipeline([('Kmean',KMeans(n_clusters=400)),('pca',PCA(n_components=.99)),('scaler',MinMaxScaler()),('clf',mlp_model_1)])),
               ('plain_mlp',
               Pipeline([('pca',PCA(n_components=.99)),('scaler',MinMaxScaler()),('clf',mlp_model_2)])),
               ('SVM',
               Pipeline([('pca',PCA(n_components=.99)),('scaler',MinMaxScaler()),('clf',svm_model)])),
               ('RF',
               Pipeline([('pca',PCA(n_components=.99)),('scaler',MinMaxScaler()),('clf',rf_model)]))
               ]

stacked = StackingClassifier(estimators=estimators,n_jobs=-1, cv=4,final_estimator=CalibratedClassifierCV())#CV=4 requirement
stacked.fit(x_train,y_train)


In [31]:
test_results = stacked.predict(x_test)

In [32]:
df = pd.DataFrame(columns=['called'], data=test_results)

In [33]:
df.to_csv('FINAL_TEST_RESULTS.csv',index=False,header=None)

6    12
3     5
4     1
1     1
5     1
Name: called, dtype: int64

0.86