In [181]:
import tensorflow as tf
import numpy as np
import pandas as pd
import sklearn.utils as utils
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import sklearn.metrics as metrics
from random import shuffle
import itertools
from sklearn.model_selection import KFold

In [335]:
class SimpleNeuralNetwork(object):
    
    def __init__(self, param_dict, num_epochs):
        self.build_graph(param_dict)
        self.num_epochs = num_epochs
     

    def prep_data(self, train_file, test_file):
        data = pd.read_csv(train_file, index_col = 0)
        labels = data['labels']
        labels = pd.get_dummies(labels)
        data = data.drop(['labels'], axis = 1)
        
        
        testx = pd.read_csv(test_file, index_col = 0)
        
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(data, labels)
        self.X_test = testx
        return data, labels
    
    def prep_3fold(self, train_file):
        data = pd.read_csv(train_file, index_col = 0)
        labels = data['labels']
        labels = pd.get_dummies(labels)
        data = data.drop(['labels'], axis = 1)



    def batch_generator(self, data, labels):

        data = data.sample(frac=1)
        labels = labels.reindex(data.index)
        prev = 0
        for i in range(self.batch_size, len(data), self.batch_size):
            yield (data[prev:i], labels[prev:i])
            prev = i


    def get_batch(self, data, labels):

        data = data.sample(frac=1)
        labels = labels.reindex(data.index)
        return(data[0:self.batch_size], labels[0:self.batch_size])
    
    def get_test_batch(self, data):
        data = data.sample(frac=1)
        prev = 0
        for i in range(self.batch_size, len(data), self.batch_size):
            yield data[prev:i]
            prev = i
        yield pd.concat([data[prev:], data[:self.batch_size-len(data[i:])]], axis = 0)


    def build_graph(self, param_dict):
        self.batch_size = param_dict['batch_size']
        lr = param_dict['lr']
        hidden_size = param_dict['hidden_size']
        activation = param_dict['activation']
        initializer = param_dict['initializer']
        scale = param_dict['scale']

        #graph = tf.Graph()
        
            
        with tf.name_scope('placeholders'):
            self.labels = tf.placeholder(tf.float32, shape = (self.batch_size, 10))
            self.features = tf.placeholder(tf.float32, shape = (self.batch_size, 64))

        with tf.name_scope('dense_layers'):
            fc_layer_1  = tf.layers.dense(self.features, hidden_size, activation = activation, kernel_initializer = initializer, kernel_regularizer = 
tf.contrib.layers.l2_regularizer(scale))

        with tf.name_scope('loss'):

            logits =  tf.layers.dense(fc_layer_1, 10, activation = activation)
            self.pred = tf.nn.softmax(logits)
            self.loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits,labels=self.labels))
            #loss = tf.reduce_sum(ce)

        with tf.name_scope('optim'):
            self.optimizer = tf.train.AdamOptimizer(lr).minimize(self.loss)


        with tf.name_scope('accuracy'):
            correct_pred = tf.equal(tf.argmax(self.pred, 1) ,tf.argmax(self.labels, 1))
            self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))



    def run(self, session):
        #session.as_default()
        session.run(tf.global_variables_initializer())

        cur_epoch = 0
        avg_loss = 0
        num_epochs = self.num_epochs
        acc_test = 0
        acc_train = 0
        self.max_acc = 0
        worse = 0
        step = 0

        while cur_epoch < num_epochs and worse < 80:

            for batch in self.batch_generator(self.X_train, self.y_train):
                
                ft_batch, lb_batch = batch         
                feed_dict = {self.features : ft_batch, self.labels : lb_batch}
                _, curloss, atrain = sess.run([self.optimizer, self.loss, self.accuracy], 
                                                 feed_dict = feed_dict)

                acc_train += atrain

                fv, lv = self.get_batch(self.X_val, self.y_val)
                val_dict = {self.features: fv, self.labels:lv}
                atest = sess.run(self.accuracy, feed_dict = val_dict)
                acc_test += atest


                avg_loss += curloss
                step += 1

            self.average_loss = avg_loss/step
            self.accuracy_test= acc_test/step
            self.accuracy_train = acc_train/step
            
            if self.accuracy_test > self.max_acc:
                self.max_acc = self.accuracy_test
                worse = 0
            else:
                worse += 1
            if cur_epoch % 100 == 0:
                print self.report()

            cur_epoch += 1

            
    def get_pred(self, mode = 'test'):
        print "getting predictions"
        self.pred_dict = {}
        if mode == 'test':
            data = self.X_test
        else:
            data = self.X_val
            
        count = 0
        for batch in self.get_test_batch(data):
            count+=1
                
            feed_dict = {self.features : batch}
            pred = sess.run(self.pred, feed_dict)
            
            for i, ind in enumerate(batch.index):
                if np.isnan(pred).any():
                    print count
                    continue
                self.pred_dict[ind] = np.argmax(pred[i])
                          
        return self.pred_dict
            
    def get_predictions(self):
        return self.pred
            
            
            
    def report(self):
        return [('loss',self.average_loss), ('test_acc',self.accuracy_test), ('train_acc',self.accuracy_train), ('max_acc', self.max_acc)]

    def get_dataset(self, ident):
        if ident == 'y_train':
            return self.y_val

In [347]:
#Ran a hyperparameter search and replaced possible values with tuned parameters
    
epochs = 800
results = []

blist = [32]
lr = [0.0007]
hidden_size = [200]
actiavtion = [tf.nn.tanh]
initializer = [tf.contrib.layers.xavier_initializer()]
scale = [0.3]

hyperparameters = [i for i in itertools.product(blist, lr, hidden_size, actiavtion, initializer, scale)]
shuffle(hyperparameters)

total = len(blist) * len(lr) * len(hidden_size) * len(actiavtion) * len(initializer) * len(scale)
step = 0

#for hyp in hyperparameters:
curavg_loss = 0
curtest_acc = 0
curtrain_acc = 0
max_acc = 0
bs, l, hs, a, i, s = hyperparameters[0]
print ('---------------------------------------------------------')
print ('currently running experiment %d out of %d' % (step, total))
print 'batch size: %d, learning_rate: %f, hidden_size: %d, activation: %s, scale : %f, initializer:' % (bs, l, hs, a.__name__, s)
print i 

# for k in range(3):
 #   print 'running run %d' % (k+1)
param_dict = {'batch_size' : bs, 'lr' : l, 'hidden_size' : hs, 'activation' : a, 'initializer' : i, 'scale' : s}
model = SimpleNeuralNetwork(param_dict, epochs)
data, labels = model.prep_data('data/digits_train.csv', 'data/digits_test.csv')

with tf.Session() as sess:
    sess.as_default()

    model.run(sess)
    report = model.report()
    results.append(report)
    curavg_loss += report[0][1]
    curtest_acc += report[1][1]
    curtrain_acc += report[2][1]
    max_acc += report[3][1]
    print "loss: %f, test_acc: %f, train_acc: %f, max_acc: %f" % (curavg_loss, curtest_acc, curtrain_acc, max_acc)
    preds = model.get_pred('test')

    

---------------------------------------------------------
currently running experiment 0 out of 1
batch size: 32, learning_rate: 0.000700, hidden_size: 200, activation: tanh, scale : 0.300000, initializer:
<function _initializer at 0x1c2878e668>
[('loss', 1.3518297486407782), ('test_acc', 0.7681451612903226), ('train_acc', 0.7684811827956989), ('max_acc', 0.7681451612903226)]
[('loss', 0.8135342737502163), ('test_acc', 0.9788273182156926), ('train_acc', 0.9951193708080486), ('max_acc', 0.9788273182156926)]
[('loss', 0.8051411117602075), ('test_acc', 0.9812513373990264), ('train_acc', 0.9975475445353876), ('max_acc', 0.9812550403225806)]
[('loss', 0.802308305762092), ('test_acc', 0.9821439734933733), ('train_acc', 0.9983623137927339), ('max_acc', 0.9821439734933733)]
[('loss', 0.8009855866550463), ('test_acc', 0.9827087187944118), ('train_acc', 0.9987480894537849), ('max_acc', 0.9827087187944118)]
loss: 0.800259, test_acc: 0.982579, train_acc: 0.998956, max_acc: 0.982709
getting predict

In [350]:
preds2 = pd.DataFrame.from_dict(preds, orient='index')
preds2.index.name = 'id'
preds2.columns = ['pred']
preds2.to_csv('submissions/digit_test_labelled.csv')

In [351]:
print preds2

      pred
id        
0        6
1        3
2        9
3        2
4        5
5        7
6        6
7        8
8        1
9        0
10       2
11       2
12       9
13       2
14       3
15       0
16       9
17       9
18       5
19       2
20       7
21       2
22       4
23       1
24       0
25       4
26       6
27       1
28       7
29       7
...    ...
1590     1
1591     9
1592     6
1593     1
1594     9
1595     7
1596     0
1597     7
1598     0
1599     7
1600     3
1601     0
1602     5
1603     2
1604     0
1605     8
1606     0
1607     5
1608     5
1609     9
1610     5
1611     0
1612     4
1613     3
1614     1
1615     3
1616     0
1617     0
1618     6
1619     2

[1620 rows x 1 columns]


In [345]:
#print preds
df = pd.read_csv('data/digits_train.csv', index_col = 0)

correct = df['labels']
ytrain = model.get_dataset('y_train')
correct = correct.loc[ytrain.index]
correct = correct.sort_index()
preds2 = preds2.sort_index()

In [346]:
print metrics.classification_report(preds2, correct)
print metrics.confusion_matrix(preds2, correct)

             precision    recall  f1-score   support

          0       1.00      0.99      1.00       101
          1       0.99      0.94      0.96       102
          2       0.99      0.99      0.99       107
          3       0.97      1.00      0.99       109
          4       1.00      0.98      0.99       100
          5       0.98      0.98      0.98        90
          6       0.99      1.00      1.00       102
          7       1.00      1.00      1.00        89
          8       0.96      0.97      0.96        93
          9       0.95      0.97      0.96       107

avg / total       0.98      0.98      0.98      1000

[[100   0   0   0   0   0   0   0   0   1]
 [  0  96   1   1   0   0   0   0   4   0]
 [  0   0 106   0   0   0   1   0   0   0]
 [  0   0   0 109   0   0   0   0   0   0]
 [  0   0   0   0  98   0   0   0   0   2]
 [  0   0   0   1   0  88   0   0   0   1]
 [  0   0   0   0   0   0 102   0   0   0]
 [  0   0   0   0   0   0   0  89   0   0]
 [  0   0   0   1

In [267]:
training = df.drop(correct.index)
training_labels = training['labels']
training = training.drop(['labels'], axis = 1)

testing = df.loc[correct.index]
testing = testing.drop(['labels'], axis = 1)

classifier = RandomForestClassifier()
classifier.fit(training, training_labels)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [271]:
predrf = classifier.predict(testing)

In [272]:
print metrics.classification_report(thing, correct)
print metrics.confusion_matrix(thing, correct)

             precision    recall  f1-score   support

          0       1.00      0.98      0.99       102
          1       0.97      0.89      0.93       108
          2       0.96      0.97      0.97       106
          3       0.95      0.95      0.95       112
          4       0.96      0.99      0.98       110
          5       0.99      0.98      0.99       102
          6       0.97      0.98      0.97        92
          7       0.97      0.94      0.95        98
          8       0.88      0.96      0.92        80
          9       0.88      0.90      0.89        90

avg / total       0.96      0.95      0.95      1000

[[100   0   0   0   1   0   0   0   1   0]
 [  0  96   1   1   1   0   1   1   7   0]
 [  0   0 103   1   0   0   0   0   2   0]
 [  0   0   0 106   0   0   0   1   0   5]
 [  0   0   0   0 109   0   1   0   0   0]
 [  0   0   0   0   0 100   0   0   0   2]
 [  0   0   1   0   0   0  90   0   1   0]
 [  0   1   0   1   0   1   0  92   0   3]
 [  0   0   1   0

In [273]:
probrf = classifier.get_prob(testing)
print probrf

AttributeError: 'RandomForestClassifier' object has no attribute 'get_prob'

In [105]:
rank = sorted(results, key = lambda x: x[1][2][1], reverse = True)

IndexError: tuple index out of range

In [90]:
with tf.Session() as sess:
    param_dict = {'batch_size' : 32, 
                  'lr' : 0.001, 
                  'hidden_size' : 200, 
                  'activation' : tf.nn.tanh, 
                  'initializer' : tf.contrib.layers.xavier_initializer(), 
                  'scale': 0.5}
    model = SimpleNeuralNetwork(param_dict, 1000)
    data, labels = model.prep_data('data/digits_train.csv')
    model.run(sess)


[('loss', 1.4426476135048816), ('test_acc', 0.6760752688172043), ('train_acc', 0.6935483870967742)]
[('loss', 1.2658486645067892), ('test_acc', 0.7642809139784946), ('train_acc', 0.777385752688172)]
[('loss', 1.1915267906735876), ('test_acc', 0.7973790322580645), ('train_acc', 0.8103718637992832)]
[('loss', 1.1487423399443268), ('test_acc', 0.8146001344086021), ('train_acc', 0.828125)]
[('loss', 1.1197157759820262), ('test_acc', 0.8247983870967742), ('train_acc', 0.8398521505376344)]
[('loss', 1.0980663271787774), ('test_acc', 0.832269265232975), ('train_acc', 0.8484543010752689)]
[('loss', 1.08151652836763), ('test_acc', 0.8370295698924731), ('train_acc', 0.8547907066052227)]
[('loss', 1.0685598836630903), ('test_acc', 0.8424899193548387), ('train_acc', 0.8595010080645161)]
[('loss', 1.0577744553878174), ('test_acc', 0.8452060931899642), ('train_acc', 0.8635379330943848)]
[('loss', 1.0486052549013527), ('test_acc', 0.8482190860215054), ('train_acc', 0.8668346774193548)]
[('loss', 1.04

[('loss', 0.9504641301575161), ('test_acc', 0.8811883960573477), ('train_acc', 0.9035578277009728)]
[('loss', 0.9486798769382643), ('test_acc', 0.8823924731182796), ('train_acc', 0.9046845351043643)]
[('loss', 0.9469358805836245), ('test_acc', 0.883591991747937), ('train_acc', 0.9057850400100025)]
[('loss', 0.9452306787349756), ('test_acc', 0.8847639352366827), ('train_acc', 0.9068602459522926)]
[('loss', 0.9435634605317055), ('test_acc', 0.8858481488269795), ('train_acc', 0.9079110153958945)]
[('loss', 0.9419327730485548), ('test_acc', 0.8868966714993355), ('train_acc', 0.9089381720430108)]
[('loss', 0.9403379372869626), ('test_acc', 0.8878882915173237), ('train_acc', 0.9099425029868579)]
[('loss', 0.9387775090770999), ('test_acc', 0.8889061207609594), ('train_acc', 0.9109247607231479)]
[('loss', 0.9372504745871735), ('test_acc', 0.8899273901355774), ('train_acc', 0.911885665030388)]
[('loss', 0.9357558306703293), ('test_acc', 0.8908797259798821), ('train_acc', 0.9128259047288704)]
[(

[('loss', 0.8744110330343476), ('test_acc', 0.9303248342025626), ('train_acc', 0.9513010269783014)]
[('loss', 0.8739510939112701), ('test_acc', 0.9306455613159242), ('train_acc', 0.9515889016897081)]
[('loss', 0.8734965927577955), ('test_acc', 0.9309505630845581), ('train_acc', 0.9518733695997964)]
[('loss', 0.8730474313250981), ('test_acc', 0.9312519765970905), ('train_acc', 0.9521544908285895)]
[('loss', 0.8726035168746173), ('test_acc', 0.9315616550336414), ('train_acc', 0.9524323240897944)]
[('loss', 0.8721618189211636), ('test_acc', 0.9318560108777194), ('train_acc', 0.9527088803450863)]
[('loss', 0.8717281506566724), ('test_acc', 0.9321353098390205), ('train_acc', 0.9529802970973957)]
[('loss', 0.8712994591361173), ('test_acc', 0.9323940180447411), ('train_acc', 0.9532485941169201)]
[('loss', 0.8708756627172186), ('test_acc', 0.9326612903225806), ('train_acc', 0.9535138248847926)]
[('loss', 0.8704566770540013), ('test_acc', 0.9329102517106549), ('train_acc', 0.9537760416666666)]


[('loss', 0.8487501557924414), ('test_acc', 0.9467916666666667), ('train_acc', 0.967358870967742)]
[('loss', 0.8485444590767873), ('test_acc', 0.9469032472261492), ('train_acc', 0.9674875765754187)]
[('loss', 0.8483403948060806), ('test_acc', 0.9470379437617341), ('train_acc', 0.9676152607100188)]
[('loss', 0.8481379436435325), ('test_acc', 0.9471609503166305), ('train_acc', 0.967741935483871)]
[('loss', 0.847937086458463), ('test_acc', 0.9472909258318517), ('train_acc', 0.9678676128185589)]
[('loss', 0.8477378045358965), ('test_acc', 0.9474264705882353), ('train_acc', 0.9679923044486611)]
[('loss', 0.847540079448272), ('test_acc', 0.9475543934811828), ('train_acc', 0.9681160219254032)]
[('loss', 0.8473438930348878), ('test_acc', 0.9476760909585373), ('train_acc', 0.9682387766202251)]
[('loss', 0.847149227386794), ('test_acc', 0.9478033570892723), ('train_acc', 0.9683605797282654)]
[('loss', 0.846956064881525), ('test_acc', 0.9479218561879852), ('train_acc', 0.9684814422717648)]
[('los

[('loss', 0.8361212720538084), ('test_acc', 0.9547581856049598), ('train_acc', 0.9753382414672738)]
[('loss', 0.836004606427186), ('test_acc', 0.9548292527847531), ('train_acc', 0.9754110730152598)]
[('loss', 0.8358886319147298), ('test_acc', 0.9549049109292248), ('train_acc', 0.975483469748034)]
[('loss', 0.8357733432022322), ('test_acc', 0.9549761184715821), ('train_acc', 0.975555435547875)]
[('loss', 0.8356587341466124), ('test_acc', 0.9550618598640758), ('train_acc', 0.9756269742509811)]
[('loss', 0.8355447997715877), ('test_acc', 0.9551252226888083), ('train_acc', 0.9756980896481516)]
[('loss', 0.8354315336792598), ('test_acc', 0.955206053541409), ('train_acc', 0.9757687854854569)]
[('loss', 0.8353189311172298), ('test_acc', 0.9552893738140418), ('train_acc', 0.9758390654648956)]
[('loss', 0.8352069861934821), ('test_acc', 0.9553485557973072), ('train_acc', 0.9759089332450415)]
[('loss', 0.8350956928825211), ('test_acc', 0.9554181993963403), ('train_acc', 0.9759783924416777)]
[('l

[('loss', 0.8283419840583213), ('test_acc', 0.9595425261062862), ('train_acc', 0.9801916938068652)]
[('loss', 0.8282671165498278), ('test_acc', 0.9595920038163018), ('train_acc', 0.9802383899332148)]
[('loss', 0.8281926070628186), ('test_acc', 0.9596404409116633), ('train_acc', 0.9802848626331224)]
[('loss', 0.8281184531195529), ('test_acc', 0.9596854389611723), ('train_acc', 0.9803311135063002)]
[('loss', 0.8280446521316012), ('test_acc', 0.9597350230414746), ('train_acc', 0.9803771441372248)]
[('loss', 0.8279712015579624), ('test_acc', 0.9597827752662631), ('train_acc', 0.9804229560953184)]
[('loss', 0.8278980989698083), ('test_acc', 0.9598326899556643), ('train_acc', 0.9804685509351272)]
[('loss', 0.8278253418946109), ('test_acc', 0.9598815742647245), ('train_acc', 0.9805139301964971)]
[('loss', 0.8277529278378212), ('test_acc', 0.9599215104483668), ('train_acc', 0.9805590954047474)]
[('loss', 0.8276808544309134), ('test_acc', 0.959977862112587), ('train_acc', 0.9806040480708412)]
[

[('loss', 0.8231488619053744), ('test_acc', 0.9630160859353115), ('train_acc', 0.9834305708190575)]
[('loss', 0.8230968040996982), ('test_acc', 0.9630530913978494), ('train_acc', 0.9834630376344086)]
[('loss', 0.8230449541096135), ('test_acc', 0.9630765351018393), ('train_acc', 0.9834953748417145)]
[('loss', 0.8229933106791507), ('test_acc', 0.9631179582744291), ('train_acc', 0.983527583215525)]
[('loss', 0.822941872585263), ('test_acc', 0.9631532044293379), ('train_acc', 0.983559663524231)]
[('loss', 0.822890638598079), ('test_acc', 0.9631936443932412), ('train_acc', 0.9835916165301246)]
[('loss', 0.8228396075139675), ('test_acc', 0.9632259395294368), ('train_acc', 0.9836234429894603)]
[('loss', 0.8227887781248794), ('test_acc', 0.9632600992392367), ('train_acc', 0.9836551436525139)]
[('loss', 0.8227381492411455), ('test_acc', 0.963291473139488), ('train_acc', 0.9836867192636424)]
[('loss', 0.8226877196698476), ('test_acc', 0.9633280151976971), ('train_acc', 0.9837181705613411)]
[('lo

[('loss', 0.8194359543207844), ('test_acc', 0.9655462485681557), ('train_acc', 0.9857462217788124)]
[('loss', 0.819397676271168), ('test_acc', 0.9655684584739667), ('train_acc', 0.9857700944318413)]
[('loss', 0.8193595293107625), ('test_acc', 0.9655911676977463), ('train_acc', 0.985793885329209)]
[('loss', 0.8193215127673157), ('test_acc', 0.9656160968660968), ('train_acc', 0.9858175948901755)]
[('loss', 0.8192836259731638), ('test_acc', 0.9656438080296524), ('train_acc', 0.9858412235311388)]
[('loss', 0.8192458682651932), ('test_acc', 0.965668562583576), ('train_acc', 0.9858647716656592)]
[('loss', 0.8192082389848007), ('test_acc', 0.965691518542901), ('train_acc', 0.9858882397044839)]
[('loss', 0.8191707374767677), ('test_acc', 0.9657218129872026), ('train_acc', 0.9859116280555708)]
[('loss', 0.8191333630935772), ('test_acc', 0.9657480180426462), ('train_acc', 0.9859349371241115)]
[('loss', 0.8190961151888408), ('test_acc', 0.9657781143678474), ('train_acc', 0.9859581673125557)]
[('l

[('loss', 0.8167597644725568), ('test_acc', 0.9671416444336648), ('train_acc', 0.987460586951249)]
[('loss', 0.8167295250504552), ('test_acc', 0.9671612136329878), ('train_acc', 0.9874794148987698)]
[('loss', 0.8166993759332645), ('test_acc', 0.9671827392755236), ('train_acc', 0.9874981863906757)]
[('loss', 0.816669316793139), ('test_acc', 0.9672011823127937), ('train_acc', 0.98751690168051)]
[('loss', 0.8166393472179719), ('test_acc', 0.9672235884083128), ('train_acc', 0.9875355610202999)]
[('loss', 0.8166094668134222), ('test_acc', 0.9672369001765366), ('train_acc', 0.9875541646605681)]
[('loss', 0.8165796752123337), ('test_acc', 0.9672521753761838), ('train_acc', 0.9875727128503438)]
[('loss', 0.8165499719629457), ('test_acc', 0.9672669050819253), ('train_acc', 0.9875912058371735)]
[('loss', 0.8165203567961706), ('test_acc', 0.9672850860374826), ('train_acc', 0.9876096438671332)]
[('loss', 0.8164908292524352), ('test_acc', 0.9672957348202036), ('train_acc', 0.9876280271848378)]
[('l

KeyboardInterrupt: 

In [62]:
for ind, i in enumerate(rank): 
    print i[0]
    print i[1]
    print '------------------------------'
    if ind > 20:
        break

{'activation': <function tanh at 0x10ec8b2a8>, 'hidden_size': 200, 'lr': 0.001, 'batch_size': 32, 'initializer': <function _initializer at 0x1c25600848>}
[('loss', 0.8120406067884096), ('test_acc', 0.9744623655913979), ('train_acc', 0.9944422043010752)]
------------------------------
{'activation': <function tanh at 0x10ec8b2a8>, 'hidden_size': 200, 'lr': 0.001, 'batch_size': 64, 'initializer': <function _initializer at 0x1c25600848>}
[('loss', 0.8157388887975527), ('test_acc', 0.9818614130434783), ('train_acc', 0.9935258152173913)]
------------------------------
{'activation': <function tanh at 0x10ec8b2a8>, 'hidden_size': 100, 'lr': 0.001, 'batch_size': 32, 'initializer': <function _initializer at 0x1c25600848>}
[('loss', 0.8228619711822079), ('test_acc', 0.9734677419354839), ('train_acc', 0.9909711021505376)]
------------------------------
{'activation': <function tanh at 0x10ec8b2a8>, 'hidden_size': 50, 'lr': 0.001, 'batch_size': 32, 'initializer': <function _initializer at 0x1c256