In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

In [2]:
data = pd.read_csv('data/credit-card-full.csv', index_col='ID')
X = data.drop(axis=1, columns='default payment next month')
y = data['default payment next month']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=.75)

In [3]:
# Create the function
def gbm_grid_search(learning_rate, max_depth, subsample):

	# Create the model
    model = GradientBoostingClassifier(
        learning_rate=learning_rate, 
        max_depth=max_depth, 
        subsample = subsample
    )
    
    # Use the model to make predictions
    predictions = model.fit(X_train, y_train).predict(X_test)
    
    # Return the hyperparameters and score
    return([learn_rate, max_depth, subsample, accuracy_score(y_test, predictions)])

In [4]:
# Create the relevant lists
results_list = []
learn_rate_list = [.01, .1, .5]
max_depth_list = [2, 4, 6]

# Create the for loop
for learn_rate in learn_rate_list:
    for max_depth in max_depth_list:
        results_list.append(gbm_grid_search(learn_rate,max_depth))

# Print the results
print(results_list)   

[[0.01, 2, 0.8133333333333334], [0.01, 4, 0.8154666666666667], [0.01, 6, 0.8132], [0.1, 2, 0.8149333333333333], [0.1, 4, 0.8148], [0.1, 6, 0.8138666666666666], [0.5, 2, 0.8102666666666667], [0.5, 4, 0.8021333333333334], [0.5, 6, 0.7878666666666667]]


In [None]:
# Create the relevant lists
results_list = []
learn_rate_list = [.01, .1, .5]
max_depth_list = [2, 4, 6]
subsamples = [.4, .6]

# Create the for loop
for learn_rate in learn_rate_list:
    for max_depth in max_depth_list:
        for subsample in subsamples:
            results_list.append(gbm_grid_search(learn_rate, max_depth, subsample))

# Print the results
print(results_list)   

In [6]:
# Create a Random Forest Classifier with specified criterion
rf_class = RandomForestClassifier(criterion='entropy')

# Create the parameter grid
param_grid = {
    'max_depth': [2, 4, 8, 15], 
    'max_features': ['auto','sqrt']
} 

# Create a GridSearchCV object
grid_rf_class = GridSearchCV(
    estimator=rf_class,
    param_grid=param_grid,
    scoring='roc_auc',
    n_jobs=4,
    cv=5,
    refit=True, return_train_score=True)
print(grid_rf_class)

GridSearchCV(cv=5, estimator=RandomForestClassifier(criterion='entropy'),
             n_jobs=4,
             param_grid={'max_depth': [2, 4, 8, 15],
                         'max_features': ['auto', 'sqrt']},
             return_train_score=True, scoring='roc_auc')
