In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import GridSearchCV
from sklearn.neural_network import MLPClassifier

In [3]:
import warnings
warnings.filterwarnings("ignore")

# Data

In [4]:
train = pd.read_csv("location_train.csv")
test = pd.read_csv("location_test.csv")

In [5]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4000 entries, 0 to 3999
Columns: 448 entries, ID to 446
dtypes: int64(448)
memory usage: 13.7 MB


In [6]:
train.head()

Unnamed: 0,ID,class,1,2,3,4,5,6,7,8,...,437,438,439,440,441,442,443,444,445,446
0,0,11,0,0,0,1,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0
1,1,3,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2,9,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,3,8,0,0,0,1,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
4,4,3,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [7]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1009 entries, 0 to 1008
Columns: 447 entries, ID to 446
dtypes: int64(447)
memory usage: 3.4 MB


In [8]:
test.head()

Unnamed: 0,ID,1,2,3,4,5,6,7,8,9,...,437,438,439,440,441,442,443,444,445,446
0,4000,0,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
1,4001,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,1,0,0,0
2,4002,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4003,0,0,0,1,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,4004,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0


In [9]:
X_train = train.drop(["ID", "class"], axis=1)
y_train = train["class"]

X_test = test.drop(["ID"], axis=1)

# Model selection

In [10]:
# Grid Search

cv = 5          # number of folds
verbose = 3     # information shown during training

## Multi-layer Perceptron

In [14]:
parameters = {
    "hidden_layer_sizes":[(300), (400), (500), (600), (700), (800)],
    "solver":["lbfgs"],
    "activation":["logistic"],
    "alpha":[0.001, 0.01]}
model = GridSearchCV(MLPClassifier(), parameters, cv=cv, verbose=verbose, scoring="f1_weighted")
model.fit(X_train, y_train)

results = pd.DataFrame(model.cv_results_)
results = results[["param_hidden_layer_sizes", "param_alpha", "mean_test_score", "std_test_score"]]
results.sort_values(["mean_test_score"], ascending=False).head(10)

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=300, solver=lbfgs;, score=0.743 total time=   2.4s
[CV 2/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=300, solver=lbfgs;, score=0.754 total time=   2.0s
[CV 3/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=300, solver=lbfgs;, score=0.752 total time=   2.1s
[CV 4/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=300, solver=lbfgs;, score=0.735 total time=   2.2s
[CV 5/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=300, solver=lbfgs;, score=0.726 total time=   2.0s
[CV 1/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=400, solver=lbfgs;, score=0.757 total time=   2.6s
[CV 2/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=400, solver=lbfgs;, score=0.751 total time=   2.9s
[CV 3/5] END activation=logistic, alpha=0.001, hidden_layer_sizes=400, solver=lbfgs;, score=0.747 total time=   2.8s
[CV

Unnamed: 0,param_hidden_layer_sizes,param_alpha,mean_test_score,std_test_score
9,600,0.01,0.762831,0.012084
7,400,0.01,0.762132,0.018097
5,800,0.001,0.756177,0.014636
6,300,0.01,0.755463,0.005216
8,500,0.01,0.754147,0.019523
10,700,0.01,0.753833,0.008995
11,800,0.01,0.751587,0.020267
3,600,0.001,0.74782,0.016568
1,400,0.001,0.746097,0.007435
2,500,0.001,0.745789,0.015776


# Final model

In [11]:
best_model = model.best_estimator_
best_model.fit(X_train, y_train)

MLPClassifier(alpha=0.01, hidden_layer_sizes=(400,), solver='lbfgs')

In [12]:
predictions = pd.DataFrame(test["ID"])
predictions["class"] = best_model.predict(X_test)

In [13]:
predictions.to_csv("submission.csv", index=False)