In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from organize import get_data
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from pprint import pprint
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
def random_hyperparameter_search(trained_model, hyperparameters, iterations,
                                 cross_validation_folds, X_train, y_train,
                                 X_test, y_test):
    print("Performing Random Hyperparameter Search with hyperparameters:")
    pprint(hyperparameters)
    random_search = RandomizedSearchCV(estimator=trained_model,
                                          param_distributions=hyperparameters,
                                          n_iter=iterations,
                                          cv=cross_validation_folds)
    random_search.fit(X_train, y_train)
    print("Best hyperparameters: ", random_search.best_params_)
    print("Best hyperparameter accuracy: ", random_search.score(X_test, y_test))

In [None]:
def grid_hyperparameter_search(trained_model, hyperparameters,
                               cross_validation_folds, X_train, y_train,
                               X_test, y_test):
    print("Performing Hyperparameter Grid Search with hyperparameters:")
    pprint(hyperparameters)
    grid_search = GridSearchCV(estimator=trained_model,
                               param_grid=hyperparameters,
                               cv=cross_validation_folds,
                               n_jobs=-1)
    grid_search.fit(X_train, y_train)
    print("Best hyperparameters: ", grid_search.best_params_)
    print("Best hyperparameter accuracy: ", grid_search.score(X_test, y_test))

In [None]:
X, y = get_data()
X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.2, random_state=20)

### Decision Tree 

In [None]:
class DecisionTree:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.dec_tree_steps = [('scalar', StandardScaler()),
                               ('decision_tree', DecisionTreeClassifier())]
    def train(self):
        self.model = Pipeline(self.dec_tree_steps).fit(self.X, self.y)
        return self.model
    
    def score(self, X_test, y_test):
        return self.model.score(X_test, y_test)

In [None]:
DecisionTreeClassifier().get_params().keys()

In [None]:
DecisionTreeClassifier().get_params()

In [None]:
#random hyperparameter search for decision tree

dec_tree_model = DecisionTree(X_train, y_train)
dec_tree_trained = dec_tree_model.train()

criterion = ['gini', 'entropy']
splitter = ['best', 'random']
min_samples_split = [2, 3, 4, 5]
max_features = ['auto', 'sqrt', 'log2']

dec_tree_hyperparameters = {'dec_tree__criterion' : criterion,
                                'dec_tree__splitter' : splitter,
                                'dec_tree__min_samples_split' : min_samples_split,
                                'dec_tree__max_features' : max_features}
iterations = 48
cross_validation_folds = 5

random_hyperparameter_search(dec_tree_trained, dec_tree_hyperparameters, 
                             iterations, cross_validation_folds, 
                             X_train, y_train, X_test, y_test)

In [None]:
#grid hyperparameter search for decision tree

dec_tree_model = DecisionTree(X_train, y_train)
dec_tree_trained = dec_tree_model.train()

criterion = ['gini', 'entropy']
splitter = ['best', 'random']
min_samples_split = [2, 3, 4, 5]
max_features = ['auto', 'sqrt', 'log2']

dec_tree_hyperparameters = {'dec_tree__criterion' : criterion,
                                'dec_tree__splitter' : splitter,
                                'dec_tree__min_samples_split' : min_samples_split,
                                'dec_tree__max_features' : max_features}
cross_validation_folds = 5

grid_hyperparameter_search(dec_tree_trained, dec_tree_hyperparameters, 
                             cross_validation_folds, 
                             X_train, y_train, X_test, y_test)

### kNN

In [None]:
class KNN:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.kNN_steps = [('scalar', StandardScaler()),
                          ('kNN', KNeighborsClassifier())]
    def train(self):
        self.model = Pipeline(self.kNN_steps).fit(self.X, self.y)
        return self.model
    
    def score(self, X_test, y_test):
        return self.model.score(X_test, y_test)

In [None]:
KNeighborsClassifier().get_params().keys()

In [None]:
KNeighborsClassifier().get_params()

In [None]:
#random hyperparameter search for kNN

kNN_model = KNN(X_train, y_train)
kNN_trained = kNN_model.train()

algorithms = ['ball_tree','kd_tree','brute']
dist_metric = ['euclidean','manhattan','chebyshev','minkowski']
n_neighbors = [1,2,3,5,7,9]
weights = ['uniform', 'distance']
kNN_hyperparameters = {'kNN__algorithm' : algorithms,
                       'kNN__metric' : dist_metric,
                       'kNN__n_neighbors' : n_neighbors,
                       'kNN__weights' : weights}
iterations = 100
cross_validation_folds = 5

random_hyperparameter_search(kNN_trained, kNN_hyperparameters, iterations,
                                 cross_validation_folds, X_train, y_train,
                                 X_test, y_test)

In [None]:
#grid hyperparameter search for kNN

kNN_model = KNN(X_train, y_train)
kNN_trained = kNN_model.train()

algorithms = ['ball_tree','kd_tree','brute']
dist_metric = ['euclidean','manhattan','chebyshev','minkowski']
n_neighbors = [1,2,3,5,7,9]
weights = ['uniform', 'distance']
kNN_hyperparameters = {'kNN__algorithm' : algorithms,
                       'kNN__metric' : dist_metric,
                       'kNN__n_neighbors' : n_neighbors,
                       'kNN__weights' : weights}

cross_validation_folds = 5

grid_hyperparameter_search(kNN_trained, kNN_hyperparameters,
                                 cross_validation_folds, X_train, y_train,
                                 X_test, y_test)

### Logistic Regression

In [None]:
class LogisticRegressionClassifier:
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.log_reg_steps = [('scalar', StandardScaler()),
                              ('log_reg', LogisticRegression())]
    def train(self):
        self.model = Pipeline(self.log_reg_steps).fit(self.X, self.y)
        return self.model

    def score(self, X_test, y_test):
        return self.model.score(X_test, y_test)

In [None]:
LogisticRegression().get_params().keys()

In [None]:
LogisticRegression().get_params()

In [None]:
#random hyperparameter search for logistic regression

log_reg_model = LogisticRegressionClassifier(X_train, y_train)
log_reg_trained = log_reg_model.train()

penalty = ['l1', 'l2']
log_reg_hyperparameters = {'log_reg__penalty' : penalty}
iterations = 100
cross_validation_folds = 5

random_hyperparameter_search(log_reg_trained, log_reg_hyperparameters, 
                             iterations, cross_validation_folds, 
                             X_train, y_train, X_test, y_test)

In [None]:
#grid hyperparameter search for logistic regression

log_reg_model = LogisticRegressionClassifier(X_train, y_train)
log_reg_trained = log_reg_model.train()

penalty = ['l1', 'l2']
log_reg_hyperparameters = {'log_reg__penalty' : penalty}
cross_validation_folds = 5

grid_hyperparameter_search(log_reg_trained, log_reg_hyperparameters, 
                             cross_validation_folds, 
                             X_train, y_train, X_test, y_test)