# MXNET - MLP MODEL

In [1]:
import pandas as pd
import mxnet as mx
import numpy as np
import logging

pd.set_option('display.max_columns', 500)

## Parameters

In [2]:
LIMIT = 200
BATCH_SIZE = 1000
BATCH_SIZE_TEST = 200
NUM_CLASSES = 2
LEARNING_RATE = 0.01
NUM_EPOCHS = 1000
DROPOUT=0.2

## Load Data

In [3]:
df = pd.read_csv('./data/data_processed.csv')

In [4]:
df.columns

Index([u'Name_p1', u'Type_1_p1', u'Type_2_p1', u'HP_p1', u'Attack_p1',
       u'Defense_p1', u'SpAtk_p1', u'SpDef_p1', u'Speed_p1', u'Generation_p1',
       u'Legendary_p1', u'Name_p2', u'Type_1_p2', u'Type_2_p2', u'HP_p2',
       u'Attack_p2', u'Defense_p2', u'SpAtk_p2', u'SpDef_p2', u'Speed_p2',
       u'Generation_p2', u'Legendary_p2', u'label'],
      dtype='object')

### Split Data & Label

In [5]:
df_label = df['label']
df_data = df.drop(['label'], axis=1)

### Split Train & Test

In [6]:
df_test_data = df_data[:LIMIT]
df_test_label = df_label[:LIMIT]
print "Test size: " + str(df_test_data.shape[0])

Test size: 200


In [7]:
df_train_data = df_data[LIMIT:]
df_train_label = df_label[LIMIT:]
print "Train size: " + str(df_train_data.shape[0])

Train size: 49800


### Initialize data iterators

In [8]:
train_iter = mx.io.NDArrayIter(np.array(df_train_data), np.array(df_train_label), BATCH_SIZE, shuffle=True)
val_iter = mx.io.NDArrayIter(np.array(df_test_data), np.array(df_test_label), BATCH_SIZE_TEST)

### Variables

In [9]:
data = mx.sym.var('data')
data = mx.sym.Dropout(data=data, p=DROPOUT)

### Network

In [10]:
# The first fully-connected layer and the corresponding activation function
fc1  = mx.sym.FullyConnected(data=data, num_hidden=128)
act1 = mx.sym.Activation(data=fc1, act_type="relu")

# The second fully-connected layer and the corresponding activation function
fc2  = mx.sym.FullyConnected(data=act1, num_hidden = 64)
act2 = mx.sym.Activation(data=fc2, act_type="relu")

fc3  = mx.sym.FullyConnected(data=act2, num_hidden = 32)
act3 = mx.sym.Activation(data=fc3, act_type="relu")

# POKEMON has 2 classes
fc4  = mx.sym.FullyConnected(data=act3, num_hidden=NUM_CLASSES)
# Softmax with cross entropy loss
mlp  = mx.sym.SoftmaxOutput(data=fc4, name='softmax')

### Train

In [11]:
# logging.getLogger().setLevel(logging.DEBUG)

mlp_model = mx.mod.Module(symbol=mlp, context=mx.cpu())
mlp_model.fit(train_iter,  # train data
              eval_data=val_iter,  # validation data
              optimizer='Adam',  # use Adam to train
              optimizer_params={'learning_rate': LEARNING_RATE},  # use fixed learning rate
              eval_metric='acc',  # report accuracy during training
#              batch_end_callback = mx.callback.Speedometer(BATCH_SIZE, 400), # output progress for each 100 data batches
              num_epoch=NUM_EPOCHS)  # train for at most 10 dataset passes

### Prediction

In [12]:
test_iter = mx.io.NDArrayIter(np.array(df_test_data), np.array(df_test_label), BATCH_SIZE_TEST)
# predict accuracy of mlp
acc = mx.metric.Accuracy()
mlp_model.score(test_iter, acc)

[('accuracy', 0.97)]

In [13]:
test_iter = mx.io.NDArrayIter(np.array(df_train_data), np.array(df_train_label), BATCH_SIZE)
# predict accuracy of mlp
acc = mx.metric.Accuracy()
mlp_model.score(test_iter, acc)

[('accuracy', 0.94456)]