In [42]:
import numpy as np
import pandas as pd
from sknn.mlp import Classifier, Layer
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import log_loss
import matplotlib.pyplot as plt
import sys
import csv

In [43]:
## # Collect data
df = pd.DataFrame.from_csv("data_train.csv", index_col = None)

## # Separate labels from data
X = df.drop(['Category'], axis = 1)
Y = df[['Category']]
labels = np.unique(Y)

## # Scale Data
scaler = MinMaxScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns = X.columns)

In [44]:
## # Define Global Parameters

# Import set of global parameters from parameters.csv
#index = sys.argv[2]
#parameters_file = sys.argv[1]
#index = 3
#parameters_filename = "parameters.csv"

df_parameters = pd.DataFrame.from_csv(parameters_filename, index_col = None)
parameters = np.array(df_parameters.iloc[[index]])[0]

prop_train,method1,neurons1,method2,neurons2,decay,learning_rate,n_iter,random_state=parameters

prop_train    = 0.10
method1       = "Tanh"
neurons1      = 1
method2       = None
neurons2      = None
decay         = 0.0001
learning_rate = 0.001
n_iter        = 25
random_state  = 1

In [45]:
## # Split data set into train/test
np.random.seed(seed=1)
msk = np.random.rand(len(X)) < prop_train
X_train = np.array(X[msk])
Y_train = np.array(Y[msk])
X_test =  np.array(X[~msk])
Y_test =  np.array(Y[~msk])

In [46]:
# Layers
if method2 == None:
    layers=[Layer(method1, weight_decay = decay, units = neurons1),
            Layer("Softmax")]
else:
    layers=[Layer(method1, weight_decay = decay, units = neurons1),
            Layer(method2, weight_decay = decay, units = neurons2),
            Layer("Softmax")]

In [47]:
## # Run nnet
# Define classifier
nn = Classifier(layers,
                learning_rate=learning_rate,
                random_state=random_state,
                n_iter=n_iter)
# Fit
nn.fit(X_train, Y_train)
# Predict
Y_hat = nn.predict(X_test)
Y_probs = nn.predict_proba(X_test)

In [48]:
## # Misclassification error rate
miss_err = float(sum(Y_test[:,0]!=Y_hat[:,0]))/float(len(Y_test[:,0]))
eps = 10^(-15)
logloss = log_loss(Y_test, Y_probs, eps = eps)

# Summarized results
result = np.array([logloss,
                   miss_err,
                   method1,
                   neurons1,
                   method2,
                   neurons2,
                   decay,
                   learning_rate,
                   n_iter,
                   random_state,
                   prop_train])

In [50]:
## # Write result into csv file
with open('results.csv', 'wb') as f:
    writer = csv.writer(f)
    writer.writerows([result])