In [5]:
%load_ext autoreload
%autoreload 2
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit,KFold, GridSearchCV
from sklearn.metrics import accuracy_score,fbeta_score,f1_score
import numpy as np
import matplotlib.pyplot as plt
from helpers import make_complexity_curve, make_learning_curve
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


**Load Cleaned Adult Data**

In [3]:
adult_df = pd.read_csv(filepath_or_buffer="adult_cleaned.csv")
adult_df_dummies = pd.get_dummies(adult_df.drop('target', 1))
adult_x = adult_df_dummies.copy().values
adult_y = adult_df["target"].copy().values
X_adult_train, X_adult_test, y_adult_train, y_adult_test = train_test_split(adult_x, adult_y, test_size=0.3,random_state=0)

**Load Cleaned Titanic Data**

In [4]:
titanic_train = pd.read_csv(filepath_or_buffer="titanic_train_cleaned.csv")
titanic_test = pd.read_csv(filepath_or_buffer="titanic_test_cleaned.csv")
titanic_df_dummies = pd.get_dummies(titanic_train.drop("Survived", 1))
X_titanic_train = titanic_df_dummies.copy().values
y_titanic_train = titanic_train["Survived"]

**Load Spam**

In [None]:
spam_df = pd.read_csv('spambase2.csv', header=0)
X_spam_train, X_spam_test, y_spam_train, y_spam_test = train_test_split(adult_x, adult_y, test_size=0.3,random_state=0)
print(f"X_spam_train: {X_spam_train.shape}")
print(f"X_spam_test: {X_spam_test.shape}")
print(f"y_spam_train: {y_spam_train.shape}")
print(f"y_spam_test: {y_spam_test.shape}")

In [10]:
def getParams(nFeatures):
    alphas = [10**-x for x in np.arange(-1,5.01,1/2)]
    hiddens = [(h,)*l for l in [1,2,3] for h in [nFeatures,nFeatures//2,nFeatures*2]]
    params = {
        'MLP__activation':['relu','logistic'],
        'MLP__alpha':alphas,
        'MLP__hidden_layer_sizes':hiddens
    }
    return params

In [12]:
pipeAdult = Pipeline([
    ('Scale',StandardScaler()),
    ('MLP',MLPClassifier(max_iter=2000,early_stopping=True,random_state=55))
])
adult_cv = GridSearchCV(pipeAdult,param_grid=getParams(adult_x.shape[1]), cv=5)
adult_cv.fit(X_adult_train.astype(float), y_adult_train.astype(float))
print(f"best params adult: {adult_cv.best_params_}")

best params adult: {'MLP__activation': 'relu', 'MLP__alpha': 0.0001, 'MLP__hidden_layer_sizes': (104,)}
