# MXNET - MLP MODEL

In [1]:
import pandas as pd
import mxnet as mx
import numpy as np
import logging

pd.set_option('display.max_columns', 500)

from sklearn import preprocessing
from sklearn.utils import shuffle

## Parameters

In [2]:
LIMIT = 200
BATCH_SIZE = 2000
BATCH_SIZE_TEST = 200
NUM_CLASSES = 2
LEARNING_RATE = 0.01
NUM_EPOCHS = 1000
DROPOUT=0.0

## Load Data

In [3]:
df = pd.read_csv('./data/data_processed.csv')

In [4]:
df = shuffle(df, random_state=0)

In [5]:
test_to_csv = pd.read_csv('./data/data_test_processed.csv')
final_to_append = pd.read_csv('./data/tests.csv', names=['player1', 'player2'])

In [6]:
test_to_csv[:1]

Unnamed: 0,Name_x,Type 1_x,Type 2_x,HP_x,Attack_x,Defense_x,Sp. Atk_x,Sp. Def_x,Speed_x,Generation_x,Legendary_x,Name_y,Type 1_y,Type 2_y,HP_y,Attack_y,Defense_y,Sp. Atk_y,Sp. Def_y,Speed_y,Generation_y,Legendary_y
0,0.757196,1.0,0.944444,0.311024,0.47027,0.266667,0.298913,0.285714,0.36,0.0,0.0,0.445557,0.705882,0.944444,0.350394,0.27027,0.311111,0.271739,0.261905,0.142857,0.0,0.0


### Split Data & Label

In [7]:
df_label = df['label']
df_data = df.drop(['label'], axis=1)

### Split Train & Test

In [8]:
df_test_data = df_data[:LIMIT]
df_test_label = df_label[:LIMIT]
print "Test size: " + str(df_test_data.shape[0])

Test size: 200


In [9]:
df_train_data = df_data[LIMIT:]
df_train_label = df_label[LIMIT:]
print "Train size: " + str(df_train_data.shape[0])

Train size: 49800


### Initialize data iterators

In [10]:
train_iter = mx.io.NDArrayIter(np.array(df_train_data), np.array(df_train_label), BATCH_SIZE, shuffle=True)
val_iter = mx.io.NDArrayIter(np.array(df_test_data), np.array(df_test_label), BATCH_SIZE_TEST)

### Variables

In [11]:
data = mx.sym.var('data')
# data = mx.sym.Dropout(data=data, p=DROPOUT)

### Network

In [12]:
# The first fully-connected layer and the corresponding activation function
fc1  = mx.sym.FullyConnected(data=data, num_hidden=128)
act1 = mx.sym.Activation(data=fc1, act_type="relu")

# The second fully-connected layer and the corresponding activation function
fc2  = mx.sym.FullyConnected(data=act1, num_hidden = 64)
act2 = mx.sym.Activation(data=fc2, act_type="relu")

# POKEMON has 2 classes
fc3  = mx.sym.FullyConnected(data=act2, num_hidden=NUM_CLASSES)
# Softmax with cross entropy loss
mlp  = mx.sym.SoftmaxOutput(data=fc3, name='softmax')

### Train

In [13]:
#logging.getLogger().setLevel(logging.DEBUG)

mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu())
mlp_model.fit(train_iter,  # train data
              eval_data=val_iter,  # validation data
              optimizer='Adam',  # use Adam to train
              optimizer_params={'learning_rate': LEARNING_RATE},  # use fixed learning rate
              eval_metric='acc',  # report accuracy during training
              #batch_end_callback = mx.callback.Speedometer(BATCH_SIZE, 400), # output progress for each 100 data batches
              num_epoch=NUM_EPOCHS)  # train for at most 10 dataset passes

### Prediction

In [14]:
test_iter = mx.io.NDArrayIter(np.array(df_test_data), np.array(df_test_label), BATCH_SIZE_TEST)
# predict accuracy of mlp
acc = mx.metric.Accuracy()
mlp_model.score(test_iter, acc)

[('accuracy', 0.93)]

In [15]:
# TRAIN ACCURACY
# predict accuracy of mlp
acc = mx.metric.Accuracy()
mlp_model.score(train_iter, acc)

[('accuracy', 0.9678)]

## Predict To CSV

In [16]:
test_to_csv_iter = mx.io.NDArrayIter(np.array(test_to_csv), None, BATCH_SIZE)

prediction = mlp_model.predict(test_to_csv_iter)
prediction = np.argmax(prediction.asnumpy(), axis=1)

In [17]:
df_predictions = pd.DataFrame(prediction)
final_to_append['winner'] = df_predictions

In [18]:
final_to_append['winner'] = np.where(final_to_append['winner'] == 0, final_to_append['player1'], final_to_append['player2'])

In [20]:
final_to_append.to_csv('./data/final_result.csv', index=False, header=False)