In [1]:
import pandas as pd

In [40]:
X = pd.read_csv('extracted_features.csv')
y = pd.read_csv('timeseries_classification.csv')

In [41]:
X.index = X.pop(X.columns[0]).values
y.drop('index',axis=1,inplace=True)
y.index = y.pop('id').values

In [42]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [46]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

normalized_X_train = pd.DataFrame(
    scaler.fit_transform(X_train),
    columns = X_train.columns
)

normalized_X_test = pd.DataFrame(
    scaler.transform(X_test),
    columns = X_test.columns
)

In [64]:
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import cross_val_score
from sklearn.naive_bayes import GaussianNB

cv = 10

nb_clf = make_pipeline(StandardScaler(), GaussianNB())
nb_score = cross_val_score(nb_clf, X, y.values.ravel(), scoring='accuracy', cv=cv)
print(f'Naive Bayes mean score: {nb_score.mean():.2f} \nStandard deviation: {nb_score.std():.2f}')

Naive Bayes mean score: 0.42 
Standard deviation: 0.08


In [63]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = make_pipeline(StandardScaler(), RandomForestClassifier(random_state=42))
rf_score = cross_val_score(rf_clf, X, y.values.ravel(), scoring='accuracy', cv=cv)
print(f'Random Forest mean score: {rf_score.mean():.2f} \nStandard deviation: {rf_score.std():.2f}')

Random Forest mean score: 0.42 
Standard deviation: 0.14


In [78]:
from sklearn.svm import SVC

svc_clf = make_pipeline(StandardScaler(), SVC(random_state=42))
svc_score = cross_val_score(svc_clf, X, y.values.ravel(), scoring='accuracy', cv=cv)
print(f'Suport Vector Classifier mean score: {svc_score.mean():.2f} \nStandard deviation: {svc_score.std():.2f}')

Suport Vector Classifier mean score: 0.34 
Standard deviation: 0.10


In [80]:
from sklearn.neural_network import MLPClassifier

nn_clf = make_pipeline(StandardScaler(), MLPClassifier(random_state=42, max_iter=500))
nn_score = cross_val_score(nn_clf, X, y.values.ravel(), scoring='accuracy', cv=cv)
print(f'MLP Classifier mean score: {nn_score.mean():.2f} \nStandard deviation: {nn_score.std():.2f}')

MLP Classifier mean score: 0.40 
Standard deviation: 0.10


In [74]:
param_grid = [
        {
            'nn__activation' : ['identity', 'logistic', 'tanh', 'relu'],
            'nn__solver' : ['lbfgs', 'sgd', 'adam'],
            'nn__hidden_layer_sizes': [
             (1,),(2,),(3,),(4,),(5,),(6,),(7,),(8,),(9,),(10,),(11,), (12,),(13,),(14,),(15,),(16,),(17,),(18,),(19,),(20,),(21,)
             ]
        }
]

In [75]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier

pipe = Pipeline(steps=[("scaler", scaler), ("nn", MLPClassifier(random_state=42, max_iter=300))])

search = GridSearchCV(pipe, param_grid, cv=10, n_jobs=-1)
search.fit(X, y)
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)

  y = column_or_1d(y, warn=True)


Best parameter (CV score=0.506):
{'nn__activation': 'relu', 'nn__hidden_layer_sizes': (6,), 'nn__solver': 'sgd'}




In [77]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(random_state=42, max_iter=300, hidden_layer_sizes=(6,), solver='sgd')
nn_clf = make_pipeline(StandardScaler(), mlp)
nn_score = cross_val_score(nn_clf, X, y.values.ravel(), scoring='accuracy', cv=cv)
print(f'MLP Classifier mean score: {nn_score.mean():.2f} \nStandard deviation: {nn_score.std():.2f}')



MLP Classifier mean score: 0.51 
Standard deviation: 0.16




In [83]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.neural_network import MLPClassifier

# increasing max_iter
pipe = Pipeline(steps=[("scaler", scaler), ("nn", MLPClassifier(random_state=42, max_iter=500))])

search = GridSearchCV(pipe, param_grid, cv=10, n_jobs=-1, verbose=2)
search.fit(X, y)
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)

Fitting 10 folds for each of 252 candidates, totalling 2520 fits
