# Test with preferred model 

This notebook implement the KNN model with 5 neighbors, using raw features X1, X3, X4, and X6, and only selecting X1, X6, F_w_mean for training. 

In [1]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split, RepeatedStratifiedKFold
from sklearn.feature_selection import f_classif
import itertools
import sys
import importlib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis 
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, make_scorer
from joblib import Parallel, delayed, parallel_backend
from threadpoolctl import threadpool_limits
import matplotlib.pylab as plt
import os
from sklearn.inspection import permutation_importance
from scipy.stats import norm, t

sys.path.append("../")
from proj_mod import training
importlib.reload(training);

## Set path to train and test data 

In [None]:
str_train_data="../data/raw.csv" #String path to the train data: Change this if there is another train set to be used instead. 
str_test_data="../data/test.csv" #String path to the test data: This is a place-holder sting, please adjust before running. 

In [None]:
#Set raw features used 
features=["X1","X3","X4","X6"] 
#Set target 
target=["Y"] 

#Create train dataframe
df_train=pd.read_csv(str_train_data) 
feat_train=df_train[features]
tar_train=np.ravel(df_train[target].values)

#Create test dataframe
df_test=pd.read_csv(str_test_data) 
feat_test=df_test[features]
tar_test=np.ravel(df_test[target].values)

## Create pipeline 

In [None]:
#Set hyper-parameters 
force=["X1","X6","F_w_mean"] 
nn=5 

#Initiate pipeline 
pipe=Pipeline([
    ("DataCreate", training.data_creator()),
    ("DataSelector", training.data_selector(force=force)),
    ("scale",StandardScaler()),
    ("KNN",KNeighborsClassifier(n_neighbors=nn))]
)

## Train and predict 

In [None]:
#Train 
pipe.fit(X=feat_train,y=tar_train)

#Predict
tar_pred=pipe.predict(X=feat_test)

## Evaluation 

In [None]:
#Accuracy score 
acc=accuracy_score(y_pred=tar_pred,y_true=tar_test)

#f1 Score 
f1=f1_score(y_pred=tar_pred,y_true=tar_test)

#Confusion matrix 
cmatrix=confusion_matrix(y_pred=tar_pred,y_true=tar_test)

#Classification report 
creport=classification_report(y_pred=tar_pred,y_true=tar_test)

In [None]:
acc

In [None]:
f1

In [None]:
cmatrix

In [None]:
creport