In [1]:
import os

os.chdir('../.')

%pwd

'd:\\work\\loan-approval-prediction'

In [None]:
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score


In [None]:
def Hyperparameter_LogisticRegression(X_train, y_train):
    """logistic regression - hyperparameter tuning"""
    # Creating the hyperparameter grid
    c_space = np.logspace(-5, 8, 15)
    param_grid = {'C': c_space}
    # Instantiating logistic regression classifier
    logreg = LogisticRegression()
    # Instantiating the GridSearchCV object
    logreg_cv = GridSearchCV(logreg, param_grid, cv = 5)
    logreg_cv.fit(X_train, y_train)
    # Print the tuned parameters and score
    print("Tuned Logistic Regression Parameters: {}".format(logreg_cv.best_params_))
    print("Best score is {}".format(logreg_cv.best_score_))

In [None]:
def Hyperparameter_SVC(X_train, y_train):
    """support vector machine classifier - hyperparameter tuning"""
    # defining parameter range
    param_grid = {'C': [0.1, 1, 10, 100, 1000], 
                  'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                  'kernel': ['rbf']}
    grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)
    # fitting the model for grid search
    grid.fit(X_train, y_train)
    # print best parameter after tuning
    print(grid.best_params_)
    # print how our model looks after hyper-parameter tuning
    print(grid.best_estimator_)
    print(grid.best_score_)
    

In [None]:
def Hyperparameter_KNN(X_train, y_train):
    """k-nn - hyperparameter tuning"""
    grid_params = { 'n_neighbors' : [3,5,7,9,11,13,15],
                   'weights' : ['uniform','distance'],
                   'metric' : ['minkowski','euclidean','manhattan'],
                   'p' : [1,2,3,4,5]}
    gs = GridSearchCV(KNeighborsClassifier(), grid_params, verbose = 1, cv=7, n_jobs = -1)
    # fit the model on our train set
    g_res = gs.fit(X_train, y_train)
    # find the best score
    print(g_res.best_score_)
    # get the hyperparameters with the best score
    print(g_res.best_params_)

In [None]:
def Hyperparameter_RF(X_train, y_train):
    """random forest - hyperparameter tuning"""
    # search RF best parameters
    forest = RandomForestClassifier(random_state = 0)
    n_estimators = [50, 80, 100, 300, 500, 800, 1200]
    max_depth = [5, 8, 15, 25, 30]
    min_samples_split = [2, 5, 10, 15, 100]
    min_samples_leaf = [1, 2, 5, 10] 
    hyperF = dict(n_estimators = n_estimators, max_depth = max_depth,  
                  min_samples_split = min_samples_split, 
                 min_samples_leaf = min_samples_leaf)
    # grid-search
    gridF = GridSearchCV(forest, hyperF, cv = 3, verbose = 1, n_jobs = -1)
    bestF = gridF.fit(X_train, y_train)
    print(bestF.best_params_)
    print(bestF.best_score_)
    # # random-search
    # randomF = RandomizedSearchCV(forest, hyperF, random_state=0)
    # bestRandomF = randomF.fit(X_train, y_train)
    # print(bestRandomF.best_params_)
    # print(bestRandomF.best_score_)

In [None]:
def Hyperparameter_RF2(X_train, X_test, y_train, y_test):
    """random forest - best accuracy finding with random_state changing after hyperparameter tuning"""
    for i in range(1, 500):
        rfc = RandomForestClassifier(random_state = i, max_depth = 8, n_estimators = 10, min_samples_split = 2, min_samples_leaf = 6)       
        rfc.fit(X_train, y_train)
        y_pred_rfc = rfc.predict(X_test)
        confusion_matrix(y_test, y_pred_rfc)
        acc = round(accuracy_score(y_pred_rfc, y_test), 2)
        if acc > 0.75:
            print(i, acc)