## Wczytanie danych z pliku

In [8]:
import pandas as pd

data = pd.read_csv('spam.csv')
print(data.shape)

(5572, 2)


## Podział danych an zbiory

In [9]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(data["v2"], data["v1"], test_size=0.3, random_state=20)

## Definicja pipelinu z MLP

In [10]:
from sklearn.pipeline import make_pipeline
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.feature_extraction.text import TfidfVectorizer

pipeline = make_pipeline(TfidfVectorizer("english"), MLPClassifier())

## Definicja parametrów do przeszukania

In [16]:
param_grid = {'mlpclassifier__hidden_layer_sizes':[1,10,64,128,256],
              'mlpclassifier__activation':['identity', 'logistic', 'tanh', 'relu'],
             'mlpclassifier__solver':['lbfgs', 'sgd', 'adam'],
             'mlpclassifier__learning_rate':['constant', 'invscaling', 'adaptive']}

## Wyszukiwanie parametrów

In [17]:
from sklearn.feature_selection import SelectKBest
from sklearn.model_selection import GridSearchCV

search = GridSearchCV(pipeline, param_grid, n_jobs=-1).fit(X_train, Y_train)



## Najlepsze dopasowane parametry:

In [18]:
print(search.best_params_)

{'mlpclassifier__activation': 'logistic', 'mlpclassifier__hidden_layer_sizes': 10, 'mlpclassifier__learning_rate': 'constant', 'mlpclassifier__solver': 'adam'}
