In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [9]:
data = pd.read_csv('./sonar_all_data_2.csv', header=None)
data.columns

Int64Index([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61],
           dtype='int64')

In [10]:
X = data.drop(columns=[60, 61], axis=1)
y = data[61]

In [11]:
# Split the training and the testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.30)

In [12]:
hidden_layer_sizes_opt = [(10, 10, 10, ), (100, ), (500, )]
start_val = 1e-5
end_val = 1e-4
num_val = 10

alpha=np.arange(start_val, end_val, (end_val - start_val) / num_val)

param_grid = dict(hidden_layer_sizes=hidden_layer_sizes_opt,
                  alpha=alpha,
                  )

model = MLPClassifier(activation='logistic', solver='adam', tol=0.0001, random_state=1, max_iter=2000 )

grid = GridSearchCV(model, param_grid, cv=10, scoring='accuracy', n_jobs=-1, verbose=True)

print(param_grid)

{'hidden_layer_sizes': [(10, 10, 10), (100,), (500,)], 'alpha': array([1.0e-05, 1.9e-05, 2.8e-05, 3.7e-05, 4.6e-05, 5.5e-05, 6.4e-05,
       7.3e-05, 8.2e-05, 9.1e-05])}


In [13]:
best_scores = []
model_params = []

for i in np.arange(1, 61, 3):

    # Print the iteration number
    print("\n\nIteration: " + str(i))

    # Set the PCA transform parameters
    pca = PCA(n_components=i, random_state=1)
    X_pca = pca.fit_transform(X_train)

    # Perform a fit on the gridSearchCV model
    grid.fit(X_pca, y_train)

    # Display the various grid parameters
    temp = grid.best_score_
    best_scores.append(temp)
    model_params.append(grid.best_params_)
    print(temp)
    # print(grid.cv_results_)
    print(grid.best_params_)
    print(grid.best_estimator_)



Iteration: 1
Fitting 10 folds for each of 30 candidates, totalling 300 fits
0.5514285714285715
{'alpha': 1e-05, 'hidden_layer_sizes': (500,)}
MLPClassifier(activation='logistic', alpha=1e-05, hidden_layer_sizes=(500,),
              max_iter=2000, random_state=1)


Iteration: 4
Fitting 10 folds for each of 30 candidates, totalling 300 fits
0.6547619047619048
{'alpha': 1e-05, 'hidden_layer_sizes': (100,)}
MLPClassifier(activation='logistic', alpha=1e-05, max_iter=2000, random_state=1)


Iteration: 7
Fitting 10 folds for each of 30 candidates, totalling 300 fits
0.7642857142857142
{'alpha': 1e-05, 'hidden_layer_sizes': (100,)}
MLPClassifier(activation='logistic', alpha=1e-05, max_iter=2000, random_state=1)


Iteration: 10
Fitting 10 folds for each of 30 candidates, totalling 300 fits
0.7985714285714286
{'alpha': 1e-05, 'hidden_layer_sizes': (100,)}
MLPClassifier(activation='logistic', alpha=1e-05, max_iter=2000, random_state=1)


Iteration: 13
Fitting 10 folds for each of 30 candidates

In [14]:
# Get the max of the best scores and print that estimator parameters
print("Max accuracy occured at the PCA component: " + str(max(best_scores)))
print(
    "Parameters corresponding to the max accuracy for cross validation: ",
    model_params[best_scores.index(max(best_scores))])

Max accuracy occured at the PCA component: 0.8338095238095239
Parameters corresponding to the max accuracy for cross validation:  {'alpha': 1e-05, 'hidden_layer_sizes': (100,)}
