# Training Notebook

## Imports

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pylab as plt

# model pipeline imports
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix


# model imports
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble._weight_boosting import AdaBoostClassifier
from sklearn.ensemble._bagging import BaggingClassifier
from sklearn.naive_bayes import BernoulliNB
from sklearn.calibration import CalibratedClassifierCV
from sklearn.multioutput import ClassifierChain
from sklearn.tree._classes import DecisionTreeClassifier
from sklearn.ensemble._forest import ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble._gb import GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm._classes import LinearSVC
from sklearn.linear_model._logistic import LogisticRegression
from sklearn.neural_network._multilayer_perceptron import MLPClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm._classes import NuSVC
from sklearn.linear_model._passive_aggressive import PassiveAggressiveClassifier
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.ensemble._forest import RandomForestClassifier
from sklearn.linear_model._ridge import RidgeClassifier
from sklearn.linear_model._stochastic_gradient import SGDClassifier
from sklearn.svm._classes import SVC
from sklearn.ensemble._stacking import StackingClassifier
from sklearn.ensemble._voting import VotingClassifier
import xgboost as xgb
xgb.set_config(verbosity=0)

## importing proccessed datasets

In [5]:
x_train = pd.read_csv(r'.\proccesed_data\x_train.csv')
y_train = pd.read_csv(r'.\proccesed_data\y_train.csv')
x_test = pd.read_csv(r'.\proccesed_data\x_test.csv')

## creating list of all models

In [6]:
models = [('AdaBoost', Pipeline([("scaling", StandardScaler()), ("model", AdaBoostClassifier())])),
          ('KNN', Pipeline([("scaling", StandardScaler()), ("model", KNeighborsClassifier())])),
          ('BagClass', Pipeline([("scaling", StandardScaler()), ("model", BaggingClassifier())])),
          ('Bernouli', Pipeline([("scaling", StandardScaler()), ("model", BernoulliNB())])),
          ('Calibrated', Pipeline([("scaling", StandardScaler()), ("model", CalibratedClassifierCV())])),
          ('DecisionT', Pipeline([("scaling", StandardScaler()), ("model", DecisionTreeClassifier())])),
          ('ExtraTrees', Pipeline([("scaling", StandardScaler()), ("model", ExtraTreesClassifier())])),
          ('GaussNB', Pipeline([("scaling", StandardScaler()), ("model", GaussianNB())])),
          ('GBoostC', Pipeline([("scaling", StandardScaler()), ("model", GradientBoostingClassifier())])),
          ('LDA', Pipeline([("scaling", StandardScaler()), ("model", LinearDiscriminantAnalysis())])),
          ('LogisticR', Pipeline([("scaling", StandardScaler()), ("model", LogisticRegression())])),
          ('MLP', Pipeline([("scaling", StandardScaler()), ("model", MLPClassifier(hidden_layer_sizes=(100,8), max_iter=500))])),
          ('NuSVC', Pipeline([("scaling", StandardScaler()), ("model", NuSVC())])),
          ('PAC', Pipeline([("scaling", StandardScaler()), ("model", PassiveAggressiveClassifier())])),
          ('QDA', Pipeline([("scaling", StandardScaler()), ("model", QuadraticDiscriminantAnalysis())])),
          ('RandomF', Pipeline([("scaling", StandardScaler()), ("model", RandomForestClassifier())])),
          ('RidgeC', Pipeline([("scaling", StandardScaler()), ("model", RidgeClassifier())])),
          ('SGD', Pipeline([("scaling", StandardScaler()), ("model", SGDClassifier())])),
          ('SVC', Pipeline([("scaling", StandardScaler()), ("model", SVC())])),
          ('XGB', Pipeline([("scaling", StandardScaler()), ("model", xgb.XGBClassifier(
                                                                                     learning_rate=.01,
                                                                                     n_estimators= 2000,
                                                                                     max_depth= 4,
                                                                                     min_child_weight= 2,
                                                                                     gamma=0.9,                        
                                                                                     subsample=0.8,
                                                                                     colsample_bytree=0.8,
                                                                                     objective= 'binary:logistic',
                                                                                     nthread= -1,
                                                                                     scale_pos_weight=1,
                                                                                     use_label_encoder= False))]))]
         
           
# ('Classchain', Pipeline([("scaling", StandardScaler()), ("model", ClassifierChain(base_estimator=LogisticRegression()))])),
# ('MultiNB', Pipeline([("scaling", StandardScaler()), ("model", MultinomialNB())])),
# ('L-SVC', Pipeline([("scaling", StandardScaler()), ("model", LinearSVC())])),
# ('StackingC', Pipeline([("scaling", StandardScaler()), ("model", StackingClassifier(estimators=LogisticRegression()))])),
# ('VotingC', Pipeline([("scaling", StandardScaler()), ("model", VotingClassifier(estimators=LogisticRegression()))])),
print(models[1])

('KNN', Pipeline(steps=[('scaling', StandardScaler()),
                ('model', KNeighborsClassifier())]))
