In [1]:
import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
spam = pd.read_csv("spambase.data", header=None)

In [3]:
spam.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,48,49,50,51,52,53,54,55,56,57
0,0.0,0.64,0.64,0.0,0.32,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.778,0.0,0.0,3.756,61,278,1
1,0.21,0.28,0.5,0.0,0.14,0.28,0.21,0.07,0.0,0.94,...,0.0,0.132,0.0,0.372,0.18,0.048,5.114,101,1028,1
2,0.06,0.0,0.71,0.0,1.23,0.19,0.19,0.12,0.64,0.25,...,0.01,0.143,0.0,0.276,0.184,0.01,9.821,485,2259,1
3,0.0,0.0,0.0,0.0,0.63,0.0,0.31,0.63,0.31,0.63,...,0.0,0.137,0.0,0.137,0.0,0.0,3.537,40,191,1
4,0.0,0.0,0.0,0.0,0.63,0.0,0.31,0.63,0.31,0.63,...,0.0,0.135,0.0,0.135,0.0,0.0,3.537,40,191,1


In [4]:
spam.shape

(4601, 58)

In [5]:
from sklearn.model_selection import train_test_split
X = spam.loc[:,0:56]
y = spam.loc[:,57]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [6]:
print(X_train.shape)
print(X_test.shape)

(3220, 57)
(1381, 57)


In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# Fit only to the training data
scaler.fit(X_train)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [8]:
# Now apply the transformations to the data:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(hidden_layer_sizes=(30,30,30))

In [10]:
mlp.fit(X_train,y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(30, 30, 30), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

In [11]:
predictions = mlp.predict(X_test)

In [12]:
predictions

array([1, 1, 0, ..., 0, 1, 1])

In [13]:
probs = mlp.predict_proba(X_test)

In [14]:
probs

array([[4.98181148e-01, 5.01818852e-01],
       [8.57347771e-03, 9.91426522e-01],
       [9.99999004e-01, 9.95631346e-07],
       ...,
       [1.00000000e+00, 4.24741807e-10],
       [8.54566298e-02, 9.14543370e-01],
       [4.64271834e-04, 9.99535728e-01]])

In [15]:
from sklearn.metrics import roc_auc_score
auc_ = roc_auc_score(y_test, probs[:,1])
print("AUC: %.4f" % auc_)

AUC: 0.9796


In [16]:
from sklearn.metrics import accuracy_score
print("acurácia: %.4f" % accuracy_score(y_test, predictions))

acurácia: 0.9385


In [17]:
from sklearn.metrics import classification_report,confusion_matrix
print(confusion_matrix(y_test,predictions))
print(classification_report(y_test,predictions))

[[800  30]
 [ 55 496]]
             precision    recall  f1-score   support

          0       0.94      0.96      0.95       830
          1       0.94      0.90      0.92       551

avg / total       0.94      0.94      0.94      1381



In [18]:
from sklearn import ensemble
from sklearn import datasets
from sklearn.utils import shuffle
from sklearn.model_selection import GridSearchCV

  from numpy.core.umath_tests import inner1d


In [19]:
tuned_parameters = [{'hidden_layer_sizes': [(1,), (5,), (10,), (5,5,)],
                     'activation' : ['identity', 'logistic', 'tanh', 'relu'],
                     'learning_rate': ['constant', 'adaptive'],
                     'alpha': [0.0001, 0.001, 0.01, 0.1, 1]}]

In [None]:
clf = GridSearchCV(MLPClassifier(), tuned_parameters, cv=3)
clf.fit(X_train, y_train)



In [None]:
print("Best parameters set found on development set:")
print()
print(clf.best_params_)
print()
print("Grid scores on development set:")
print()
means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r"
        % (mean, std * 2, params))
print()

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
print(classification_report(y_test, clf.predict(X_test)))
print()