## 利用CNN對名字進行簡單男女的分類
CNN只是一種提取特徵的工具，只要讓資料整理成類似於影像的格式，照樣可以丟進CNN<br />
這支程式只是紀錄CNN處理文本資料的流程，提供RNN以外處理文本資料的方式<br />
僅只有在training data上觀察cross entropy是否有下降，並沒有測試在testing data上的performance

In [1]:
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
import numpy as np
import csv
import math

### 讀取數據

In [2]:
name_dataset = 'name.csv'

train_x , train_y = [] , []
csvfile = open(name_dataset , 'r' , encoding = 'utf-8') 
read = csv.reader(csvfile)
for i , sample in enumerate(read):
    # 當i為0時，sample為['姓名' , '性別']，所以不列述考慮
    if i > 0:  
        if len(sample) == 2:
            train_x.append(sample[0])
            if sample[1] == '男':
                train_y.append([0, 1])  # 男
            elif sample[1] == '女':
                train_y.append([1, 0])  # 女

max_name_length = max([len(name) for name in train_x]) 
print('最長名字的字符數: ' , max_name_length)
max_name_length = 8 # 不足max_name_length的名字，就直接補0

最長名字的字符數:  3


### 數據預處理

In [3]:
# 現在在統計的是每一個字出現的次數，而不是每個名字的次數            
# counter包含了每個字對應的頻率
counter = {}  
for name in train_x:
    tokens = [word for word in name]
    for word in tokens:
        if word not in counter.keys():
            counter[word] = 1
        else:
            counter[word] += 1
count_pairs = sorted(counter.items() , key = lambda x : x[1] , reverse = True) # 根據每個字出現的次數，由高排到低            

# 取前多少個常用字，可以將len(words)換成其他數目，這裡是全部的字都收集起來
words = ['pad'] + [i[0] for i in count_pairs]
words = words[:len(words) + 1]

# 每個字映射為一個數字ID
word_to_int = {}
for idx , word in enumerate(words):
    word_to_int[word] = idx

train_x_vec = []
for name in train_x:
    name_vec = []
    for word in name:
        # 把名字的每個字拿出來，去找出word_to_int所對應的index，組合起來當作這個名字的向量
        name_vec.append(word_to_int[word]) 
    # 不足max_name_length的名字，就直接補0，補到長度max_name_length，這樣每個名字的長度都相同
    while len(name_vec) < max_name_length: 
        name_vec.append(0) 
    train_x_vec.append(name_vec)
    
train_x_vec = np.array(train_x_vec)
train_y = np.array(train_y)    

### 建立神經網路

In [4]:
# hyperparameter
input_size = max_name_length
num_classes = 2 # 總共2種類別=>男 & 女
batch_size = 64
num_batch = len(train_x_vec) // batch_size

X = tf.placeholder(tf.int32, [None , input_size])
Y = tf.placeholder(tf.float32, [None , num_classes])
dropout_keep_prob = tf.placeholder(tf.float32)

# embedding layer
vocabulary_size = len(words)
embedding_size = 128
num_filters = 130
with tf.variable_scope('embedding'):
    W = tf.Variable(tf.random_uniform([vocabulary_size , embedding_size] , -1.0 , 1.0) , name = 'embedding')
    embedded_chars = tf.nn.embedding_lookup(W , X)
    # 為了要丟進CNN，所以最後加了一個維度，這樣就類似於影像了
    embedded_chars_expanded = tf.expand_dims(embedded_chars , 3) 

![](工作流程.png)

依據考慮上下文的長度的不同，可以用不同長度的filter<br />
但最後的output的維度會不同，此時可以透過max pooling做調整，將輸出統一成同一個維度

<img src="cnn_filter.png" style="width:750px;height:400px;float:middle">

In [5]:
print('step_0 → 輸入CNN之前的維度 : {}\n'.format(embedded_chars_expanded.shape))

# convolution + maxpool layer
pooled_outputs = []
filter_sizes = [3 , 4 , 5] # 總共有3種filter
with tf.variable_scope('conv_layer_{}'.format(filter_sizes[0])):
    # 沿著
    filter_shape = [filter_sizes[0] , embedding_size , 1 , num_filters]
    W_conv1 = tf.Variable(tf.truncated_normal(filter_shape , stddev = 0.1) , name = 'weight')
    b_conv1 = tf.Variable(tf.constant(0.1 , shape = [num_filters]) , name = 'bias')
    conv_1 = tf.nn.conv2d(embedded_chars_expanded ,
                          W_conv1 ,
                          strides = [1 , 1 , 1 , 1] , 
                          padding = 'VALID')
    h_1 = tf.nn.relu(conv_1 + b_conv1)
    print('step_1-1 → 以第1種filtert處理之後的維度 : {}\n'.format(h_1.shape))
   
    pooled_1 = tf.nn.max_pool(h_1 , 
                              ksize = [1 , input_size - filter_sizes[0] + 1 , 1 , 1] , 
                              strides = [1 , 1 , 1 , 1] , 
                              padding = 'VALID')
    print('step_1-2 → 以第1次pooling之後的維度 : {}\n'.format(pooled_1.shape))
    pooled_outputs.append(pooled_1)

    
with tf.variable_scope('conv_layer_{}'.format(filter_sizes[1])):
    filter_shape = [filter_sizes[1] , embedding_size , 1 , num_filters] 
    W_conv2 = tf.Variable(tf.truncated_normal(filter_shape , stddev = 0.1) , name = 'weight')
    b_conv2 = tf.Variable(tf.constant(0.1 , shape = [num_filters]) , name = 'bias')
    conv_2 = tf.nn.conv2d(embedded_chars_expanded , 
                          W_conv2 , 
                          strides = [1 , 1 , 1 , 1] , 
                          padding = 'VALID')
    h_2 = tf.nn.relu(conv_2 + b_conv2)
    print('step_2-1 → 以第2種filtert處理之後的維度 : {}\n'.format(h_2.shape))
    
    pooled_2 = tf.nn.max_pool(h_2 , 
                              ksize = [1 , input_size - filter_sizes[1] + 1 , 1 , 1] , 
                              strides = [1 , 1 , 1 , 1] , 
                              padding = 'VALID')
    print('step_2-2 → 以第2次pooling之後的維度 : {}\n'.format(pooled_2.shape))
    pooled_outputs.append(pooled_2)

    
with tf.variable_scope('conv_layer_{}'.format(filter_sizes[2])):
    filter_shape = [filter_sizes[2] , embedding_size , 1 , num_filters]
    W_conv3 = tf.Variable(tf.truncated_normal(filter_shape , stddev = 0.1) , name = 'weight')
    b_conv3 = tf.Variable(tf.constant(0.1 , shape = [num_filters]) , name = 'bias')
    conv_3 = tf.nn.conv2d(embedded_chars_expanded , 
                          W_conv3 , 
                          strides = [1 , 1 , 1 , 1] , 
                          padding = 'VALID')
    h_3 = tf.nn.relu(conv_3 + b_conv3)
    print('step_3-1 → 以第3種filtert處理之後的維度 : {}\n'.format(h_3.shape))
    
    pooled_3 = tf.nn.max_pool(h_3 , 
                              ksize = [1 , input_size - filter_sizes[2] + 1 , 1 , 1] , 
                              strides = [1 , 1 , 1 , 1] , 
                              padding = 'VALID')
    print('step_3-2 → 以第3次pooling之後的維度 : {}\n'.format(pooled_3.shape))
    pooled_outputs.append(pooled_3)

    
num_filters_total = num_filters * len(filter_sizes)
h_pool = tf.concat(pooled_outputs , axis = 3)
print('step_4 → 將(pooled_1 , pooled_2 , pooled_3)拼接之後的維度 : {}'.format(h_pool.shape))
h_pool_flat = tf.reshape(h_pool , [-1 , num_filters_total])

# dropout
with tf.variable_scope('dropout'):
    h_drop = tf.nn.dropout(h_pool_flat , dropout_keep_prob)
    
# output
with tf.variable_scope('output'):
    W_flat = tf.get_variable(shape = [num_filters_total, num_classes] , 
                             initializer = tf.contrib.layers.xavier_initializer() , 
                             name = 'weight')
    b_flat = tf.Variable(tf.constant(0.1 , shape = [num_classes]) , name = 'bias')
    output = tf.nn.xw_plus_b(h_drop , W_flat , b_flat)
    output = tf.nn.softmax(output)

step_0 → 輸入CNN之前的維度 : (?, 8, 128, 1)

step_1-1 → 以第1種filtert處理之後的維度 : (?, 6, 1, 130)

step_1-2 → 以第1次pooling之後的維度 : (?, 1, 1, 130)

step_2-1 → 以第2種filtert處理之後的維度 : (?, 5, 1, 130)

step_2-2 → 以第2次pooling之後的維度 : (?, 1, 1, 130)

step_3-1 → 以第3種filtert處理之後的維度 : (?, 4, 1, 130)

step_3-2 → 以第3次pooling之後的維度 : (?, 1, 1, 130)

step_4 → 將(pooled_1 , pooled_2 , pooled_3)拼接之後的維度 : (?, 1, 1, 390)


In [6]:
correct = tf.equal(tf.cast(tf.greater_equal(output , 0.5) , tf.int32) , tf.cast(Y , tf.int32))
accuracy = tf.reduce_mean(tf.reduce_min(tf.cast(correct , tf.float32) , 1))

optimizer = tf.train.AdamOptimizer(1e-3)
cross_entropy_temp = -tf.reduce_sum(Y * tf.log(output + 1e-9) , axis = 1)
cross_entropy = tf.reduce_mean(cross_entropy_temp)
grads_and_vars = optimizer.compute_gradients(cross_entropy)
train_op = optimizer.apply_gradients(grads_and_vars)

saver = tf.train.Saver(tf.global_variables())
sess = tf.Session()
sess.run(tf.global_variables_initializer())

### 開始訓練神經網路

In [7]:
# minibatch data index
epochs = 200
step = (math.ceil(len(train_x) / batch_size)) * batch_size
temp = []
j = 0
index = []
for ii in range(0 , step):
    j = j + 1
    if j > len(train_x):
        j = j - (len(train_x))
    temp.append(j)
    if len(temp) == batch_size:
        index.append(temp)
        temp = []
index = list(np.array(index) - 1)

for epoch_i in range(0 , 200):
    for batch_i in range(0 , num_batch):
        batch_x = train_x_vec[index[batch_i] , :]
        batch_y = train_y[index[batch_i] , :]

        feed_dict = {X : batch_x , Y : batch_y , dropout_keep_prob : 0.9}
        _ , train_loss , train_acc = sess.run([train_op , cross_entropy , accuracy] , feed_dict)
        
        if batch_i % 1000 == 0:
            print('=' * 30)
            print('epoch_i : {}'.format(epoch_i))
            print('batch_i : {}'.format(batch_i))
            print('train_loss : {:.2f}'.format(train_loss))
            print('train_accuracy : {:.2%}\n'.format(train_acc))

epoch_i : 0
batch_i : 0
train_loss : 0.83
train_accuracy : 51.56%

epoch_i : 0
batch_i : 1000
train_loss : 0.38
train_accuracy : 85.94%

epoch_i : 0
batch_i : 2000
train_loss : 0.37
train_accuracy : 85.94%

epoch_i : 0
batch_i : 3000
train_loss : 0.49
train_accuracy : 76.56%

epoch_i : 0
batch_i : 4000
train_loss : 0.34
train_accuracy : 87.50%

epoch_i : 0
batch_i : 5000
train_loss : 0.40
train_accuracy : 82.81%

epoch_i : 1
batch_i : 0
train_loss : 0.33
train_accuracy : 87.50%

epoch_i : 1
batch_i : 1000
train_loss : 0.26
train_accuracy : 90.62%

epoch_i : 1
batch_i : 2000
train_loss : 0.29
train_accuracy : 89.06%

epoch_i : 1
batch_i : 3000
train_loss : 0.47
train_accuracy : 79.69%

epoch_i : 1
batch_i : 4000
train_loss : 0.31
train_accuracy : 87.50%

epoch_i : 1
batch_i : 5000
train_loss : 0.34
train_accuracy : 84.38%

epoch_i : 2
batch_i : 0
train_loss : 0.33
train_accuracy : 89.06%

epoch_i : 2
batch_i : 1000
train_loss : 0.23
train_accuracy : 90.62%

epoch_i : 2
batch_i : 2000
tr

epoch_i : 13
batch_i : 4000
train_loss : 0.19
train_accuracy : 92.19%

epoch_i : 13
batch_i : 5000
train_loss : 0.18
train_accuracy : 93.75%

epoch_i : 14
batch_i : 0
train_loss : 0.07
train_accuracy : 98.44%

epoch_i : 14
batch_i : 1000
train_loss : 0.16
train_accuracy : 92.19%

epoch_i : 14
batch_i : 2000
train_loss : 0.07
train_accuracy : 98.44%

epoch_i : 14
batch_i : 3000
train_loss : 0.22
train_accuracy : 85.94%

epoch_i : 14
batch_i : 4000
train_loss : 0.14
train_accuracy : 93.75%

epoch_i : 14
batch_i : 5000
train_loss : 0.14
train_accuracy : 93.75%

epoch_i : 15
batch_i : 0
train_loss : 0.12
train_accuracy : 93.75%

epoch_i : 15
batch_i : 1000
train_loss : 0.08
train_accuracy : 98.44%

epoch_i : 15
batch_i : 2000
train_loss : 0.09
train_accuracy : 98.44%

epoch_i : 15
batch_i : 3000
train_loss : 0.25
train_accuracy : 90.62%

epoch_i : 15
batch_i : 4000
train_loss : 0.13
train_accuracy : 92.19%

epoch_i : 15
batch_i : 5000
train_loss : 0.14
train_accuracy : 90.62%

epoch_i : 16

epoch_i : 27
batch_i : 1000
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 27
batch_i : 2000
train_loss : 0.03
train_accuracy : 100.00%

epoch_i : 27
batch_i : 3000
train_loss : 0.19
train_accuracy : 90.62%

epoch_i : 27
batch_i : 4000
train_loss : 0.12
train_accuracy : 95.31%

epoch_i : 27
batch_i : 5000
train_loss : 0.11
train_accuracy : 95.31%

epoch_i : 28
batch_i : 0
train_loss : 0.02
train_accuracy : 98.44%

epoch_i : 28
batch_i : 1000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 28
batch_i : 2000
train_loss : 0.15
train_accuracy : 95.31%

epoch_i : 28
batch_i : 3000
train_loss : 0.14
train_accuracy : 92.19%

epoch_i : 28
batch_i : 4000
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 28
batch_i : 5000
train_loss : 0.13
train_accuracy : 95.31%

epoch_i : 29
batch_i : 0
train_loss : 0.12
train_accuracy : 95.31%

epoch_i : 29
batch_i : 1000
train_loss : 0.28
train_accuracy : 96.88%

epoch_i : 29
batch_i : 2000
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 2

epoch_i : 40
batch_i : 4000
train_loss : 0.06
train_accuracy : 96.88%

epoch_i : 40
batch_i : 5000
train_loss : 0.10
train_accuracy : 96.88%

epoch_i : 41
batch_i : 0
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 41
batch_i : 1000
train_loss : 0.05
train_accuracy : 96.88%

epoch_i : 41
batch_i : 2000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 41
batch_i : 3000
train_loss : 0.10
train_accuracy : 96.88%

epoch_i : 41
batch_i : 4000
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 41
batch_i : 5000
train_loss : 0.06
train_accuracy : 96.88%

epoch_i : 42
batch_i : 0
train_loss : 0.16
train_accuracy : 96.88%

epoch_i : 42
batch_i : 1000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 42
batch_i : 2000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 42
batch_i : 3000
train_loss : 0.17
train_accuracy : 92.19%

epoch_i : 42
batch_i : 4000
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 42
batch_i : 5000
train_loss : 0.11
train_accuracy : 95.31%

epoch_i : 

epoch_i : 54
batch_i : 1000
train_loss : 0.02
train_accuracy : 98.44%

epoch_i : 54
batch_i : 2000
train_loss : 0.14
train_accuracy : 96.88%

epoch_i : 54
batch_i : 3000
train_loss : 0.13
train_accuracy : 92.19%

epoch_i : 54
batch_i : 4000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 54
batch_i : 5000
train_loss : 0.06
train_accuracy : 100.00%

epoch_i : 55
batch_i : 0
train_loss : 0.10
train_accuracy : 96.88%

epoch_i : 55
batch_i : 1000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 55
batch_i : 2000
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 55
batch_i : 3000
train_loss : 0.08
train_accuracy : 98.44%

epoch_i : 55
batch_i : 4000
train_loss : 0.14
train_accuracy : 93.75%

epoch_i : 55
batch_i : 5000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 56
batch_i : 0
train_loss : 0.05
train_accuracy : 96.88%

epoch_i : 56
batch_i : 1000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 56
batch_i : 2000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i :

epoch_i : 67
batch_i : 4000
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 67
batch_i : 5000
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 68
batch_i : 0
train_loss : 0.05
train_accuracy : 96.88%

epoch_i : 68
batch_i : 1000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 68
batch_i : 2000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 68
batch_i : 3000
train_loss : 0.12
train_accuracy : 93.75%

epoch_i : 68
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 68
batch_i : 5000
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 69
batch_i : 0
train_loss : 0.12
train_accuracy : 96.88%

epoch_i : 69
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 69
batch_i : 2000
train_loss : 0.14
train_accuracy : 93.75%

epoch_i : 69
batch_i : 3000
train_loss : 0.12
train_accuracy : 96.88%

epoch_i : 69
batch_i : 4000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 69
batch_i : 5000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i :

epoch_i : 81
batch_i : 1000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 81
batch_i : 2000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 81
batch_i : 3000
train_loss : 0.05
train_accuracy : 100.00%

epoch_i : 81
batch_i : 4000
train_loss : 0.03
train_accuracy : 100.00%

epoch_i : 81
batch_i : 5000
train_loss : 0.04
train_accuracy : 100.00%

epoch_i : 82
batch_i : 0
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 82
batch_i : 1000
train_loss : 0.04
train_accuracy : 96.88%

epoch_i : 82
batch_i : 2000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 82
batch_i : 3000
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 82
batch_i : 4000
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 82
batch_i : 5000
train_loss : 0.03
train_accuracy : 100.00%

epoch_i : 83
batch_i : 0
train_loss : 0.09
train_accuracy : 98.44%

epoch_i : 83
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 83
batch_i : 2000
train_loss : 0.03
train_accuracy : 98.44%

epoch

epoch_i : 94
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 94
batch_i : 5000
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 95
batch_i : 0
train_loss : 0.23
train_accuracy : 96.88%

epoch_i : 95
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 95
batch_i : 2000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 95
batch_i : 3000
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 95
batch_i : 4000
train_loss : 0.08
train_accuracy : 96.88%

epoch_i : 95
batch_i : 5000
train_loss : 0.04
train_accuracy : 96.88%

epoch_i : 96
batch_i : 0
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 96
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 96
batch_i : 2000
train_loss : 0.05
train_accuracy : 96.88%

epoch_i : 96
batch_i : 3000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 96
batch_i : 4000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 96
batch_i : 5000
train_loss : 0.03
train_accuracy : 100.00%

epoch_

epoch_i : 108
batch_i : 0
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 108
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 108
batch_i : 2000
train_loss : 0.11
train_accuracy : 98.44%

epoch_i : 108
batch_i : 3000
train_loss : 0.12
train_accuracy : 92.19%

epoch_i : 108
batch_i : 4000
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 108
batch_i : 5000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 109
batch_i : 0
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 109
batch_i : 1000
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 109
batch_i : 2000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 109
batch_i : 3000
train_loss : 0.11
train_accuracy : 95.31%

epoch_i : 109
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 109
batch_i : 5000
train_loss : 0.08
train_accuracy : 95.31%

epoch_i : 110
batch_i : 0
train_loss : 0.04
train_accuracy : 96.88%

epoch_i : 110
batch_i : 1000
train_loss : 0.14
train_accuracy : 98.44%

epoch_i : 121
batch_i : 2000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 121
batch_i : 3000
train_loss : 0.06
train_accuracy : 96.88%

epoch_i : 121
batch_i : 4000
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 121
batch_i : 5000
train_loss : 0.03
train_accuracy : 100.00%

epoch_i : 122
batch_i : 0
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 122
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 122
batch_i : 2000
train_loss : 0.11
train_accuracy : 96.88%

epoch_i : 122
batch_i : 3000
train_loss : 0.12
train_accuracy : 95.31%

epoch_i : 122
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 122
batch_i : 5000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 123
batch_i : 0
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 123
batch_i : 1000
train_loss : 0.02
train_accuracy : 98.44%

epoch_i : 123
batch_i : 2000
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 123
batch_i : 3000
train_loss : 0.04
train_accuracy : 

epoch_i : 134
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 134
batch_i : 5000
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 135
batch_i : 0
train_loss : 0.02
train_accuracy : 98.44%

epoch_i : 135
batch_i : 1000
train_loss : 0.00
train_accuracy : 100.00%

epoch_i : 135
batch_i : 2000
train_loss : 0.07
train_accuracy : 98.44%

epoch_i : 135
batch_i : 3000
train_loss : 0.10
train_accuracy : 93.75%

epoch_i : 135
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 135
batch_i : 5000
train_loss : 0.12
train_accuracy : 95.31%

epoch_i : 136
batch_i : 0
train_loss : 0.00
train_accuracy : 100.00%

epoch_i : 136
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 136
batch_i : 2000
train_loss : 0.01
train_accuracy : 98.44%

epoch_i : 136
batch_i : 3000
train_loss : 0.11
train_accuracy : 92.19%

epoch_i : 136
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 136
batch_i : 5000
train_loss : 0.02
train_accuracy : 

epoch_i : 148
batch_i : 0
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 148
batch_i : 1000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 148
batch_i : 2000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 148
batch_i : 3000
train_loss : 0.07
train_accuracy : 96.88%

epoch_i : 148
batch_i : 4000
train_loss : 0.05
train_accuracy : 96.88%

epoch_i : 148
batch_i : 5000
train_loss : 0.03
train_accuracy : 100.00%

epoch_i : 149
batch_i : 0
train_loss : 0.15
train_accuracy : 96.88%

epoch_i : 149
batch_i : 1000
train_loss : 0.00
train_accuracy : 100.00%

epoch_i : 149
batch_i : 2000
train_loss : 0.11
train_accuracy : 96.88%

epoch_i : 149
batch_i : 3000
train_loss : 0.16
train_accuracy : 93.75%

epoch_i : 149
batch_i : 4000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 149
batch_i : 5000
train_loss : 0.03
train_accuracy : 100.00%

epoch_i : 150
batch_i : 0
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 150
batch_i : 1000
train_loss : 0.00
train_accuracy : 100.0

epoch_i : 161
batch_i : 2000
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 161
batch_i : 3000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 161
batch_i : 4000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 161
batch_i : 5000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 162
batch_i : 0
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 162
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 162
batch_i : 2000
train_loss : 0.06
train_accuracy : 96.88%

epoch_i : 162
batch_i : 3000
train_loss : 0.09
train_accuracy : 95.31%

epoch_i : 162
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 162
batch_i : 5000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 163
batch_i : 0
train_loss : 0.06
train_accuracy : 98.44%

epoch_i : 163
batch_i : 1000
train_loss : 0.13
train_accuracy : 96.88%

epoch_i : 163
batch_i : 2000
train_loss : 0.08
train_accuracy : 98.44%

epoch_i : 163
batch_i : 3000
train_loss : 0.07
train_accuracy : 9

epoch_i : 174
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 174
batch_i : 5000
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 175
batch_i : 0
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 175
batch_i : 1000
train_loss : 0.02
train_accuracy : 98.44%

epoch_i : 175
batch_i : 2000
train_loss : 0.30
train_accuracy : 93.75%

epoch_i : 175
batch_i : 3000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 175
batch_i : 4000
train_loss : 0.00
train_accuracy : 100.00%

epoch_i : 175
batch_i : 5000
train_loss : 0.12
train_accuracy : 95.31%

epoch_i : 176
batch_i : 0
train_loss : 0.08
train_accuracy : 96.88%

epoch_i : 176
batch_i : 1000
train_loss : 0.05
train_accuracy : 98.44%

epoch_i : 176
batch_i : 2000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 176
batch_i : 3000
train_loss : 0.07
train_accuracy : 98.44%

epoch_i : 176
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 176
batch_i : 5000
train_loss : 0.05
train_accuracy : 

epoch_i : 188
batch_i : 0
train_loss : 0.04
train_accuracy : 98.44%

epoch_i : 188
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 188
batch_i : 2000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 188
batch_i : 3000
train_loss : 0.04
train_accuracy : 96.88%

epoch_i : 188
batch_i : 4000
train_loss : 0.02
train_accuracy : 98.44%

epoch_i : 188
batch_i : 5000
train_loss : 0.27
train_accuracy : 96.88%

epoch_i : 189
batch_i : 0
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 189
batch_i : 1000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 189
batch_i : 2000
train_loss : 0.02
train_accuracy : 100.00%

epoch_i : 189
batch_i : 3000
train_loss : 0.08
train_accuracy : 95.31%

epoch_i : 189
batch_i : 4000
train_loss : 0.01
train_accuracy : 100.00%

epoch_i : 189
batch_i : 5000
train_loss : 0.06
train_accuracy : 96.88%

epoch_i : 190
batch_i : 0
train_loss : 0.03
train_accuracy : 98.44%

epoch_i : 190
batch_i : 1000
train_loss : 0.02
train_accuracy : 98.