# Neural Network Model for MNIST Dataset

### 패키지

In [1]:
import numpy as np
import math

import tensorflow as tf

import keras
from keras import backend as K
from keras.utils import np_utils
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.utils import to_categorical

from sklearn.manifold import TSNE
from skimage import io

import theano

import json

Using TensorFlow backend.


In [2]:
 # Functions 
def get_hot_idx(arr):
    return arr.index(max(arr))

def save_to_json_file(data, filename):
    with open(filename, 'w') as outfile:
        json.dump(data, outfile)
    print(filename + ' 저장완료')
    
def get_round_array(array, decimal):
    return [round(e, decimal) for e in array]

def get_activations(model, layer, X_batch):
    get_activations = K.function([model.layers[0].input, K.learning_phase()], [model.layers[layer].output,])
    activations = get_activations([X_batch,0])
    return activations

def get_arr_from_json_file(filename):
    input_file = open (filename)
    return json.load(input_file)
    
current_milli_time = lambda: int(round(time.time() * 1000))

### 데이터셋 로드

In [3]:
image_width = 28
image_height = 28
num_of_features = image_width * image_height

num_of_class = 10
num_of_trainset = 60000
num_of_testset = 10000

(X_train, Y_train), (X_test, Y_test) = mnist.load_data()
X_train = X_train.reshape(num_of_trainset, num_of_features).astype('float32') / 255.0
X_test = X_test.reshape(num_of_testset, num_of_features).astype('float32') / 255.0
Y_train = np_utils.to_categorical(Y_train)
Y_test = np_utils.to_categorical(Y_test)

In [4]:
# 로컬 테스트 데이터 로드
images = np.zeros((10000, 784))
for real in range(10):
    for idx in range(1, 1001):
        file = '../../data/mnist/images/'+ str(real) + '/' + str(real) + '_' + str(idx) +'.png'
        image = np.ndarray.flatten(io.imread(file)) / 255.0
        image = np.array([1 - pixel for pixel in image])
        images[real * 1000 + idx - 1] = image

### 모델 구성

In [5]:
# 모델 구축 
model = Sequential()
model.add(Dense(units=16, input_dim=num_of_features, activation='relu'))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=16, activation='relu'))
model.add(Dense(units=num_of_class, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

W0716 16:58:33.951407 4448605632 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0716 16:58:33.965225 4448605632 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0716 16:58:33.967465 4448605632 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0716 16:58:34.020342 4448605632 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0716 16:58:34.034548 4448605632 deprecation_wrapper.py:119] From /anaconda3

### 모델 학습

In [6]:
model.fit(X_train, Y_train, epochs=5, batch_size=32)

W0716 16:58:35.512228 4448605632 deprecation.py:323] From /anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0716 16:58:35.561773 4448605632 deprecation_wrapper.py:119] From /anaconda3/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f9b68497fd0>

### 테스트 셋 결과 확인

In [7]:
loss_and_metrics = model.evaluate(X_test, Y_test, batch_size=32)
print('loss_and_metrics : ' + str(loss_and_metrics))

loss_and_metrics : [0.22938956558704376, 0.9321]


## 내 데이터로 테스트

In [8]:
# 예측
pred_proba =  model.predict_proba(images).tolist()

In [9]:
# 성능 확인 및 저장
correct = 0
performances = {
    'accuracy': 0,
    'recall': [0] * 10,
    'precision': [0] * 10
}
truePredict = [0] * 10
numOfPredict = [0] * 10
predicts = []

for i in range(len(pred_proba)):
    prob = pred_proba[i]
    pred = get_hot_idx(prob)
    real = i // 1000
    predicts.append(dict({
        'real': real,
        'pred': pred,
        'prob': [round(e, 4) for e in prob]
    }))
    numOfPredict[pred] = numOfPredict[pred] + 1
    if pred is real:
        truePredict[real] = truePredict[real] + 1
        correct = correct + 1

performances['accuracy'] = correct / 10000
performances['recall'] = [round(truePredict[i] / 1000, 4) for i in range(10) ]
performances['precision'] = [round(truePredict[i] / numOfPredict[i], 4) for i in range(10)]

print(performances)

{'accuracy': 0.9245, 'recall': [0.954, 0.948, 0.926, 0.908, 0.943, 0.918, 0.934, 0.906, 0.906, 0.902], 'precision': [0.939, 0.9733, 0.9196, 0.9246, 0.912, 0.9089, 0.9396, 0.9527, 0.8728, 0.9074]}


### 차원축소

1) 마지막 직전의 레이어에서 activation values를 뽑는다.

2) t-SNE로 차원을 축소한다.

In [10]:
samples = []
idxs = get_arr_from_json_file('./sample_image_idxs.json')
for idx in idxs:
    samples.append(images[idx])
print(len(samples))

500


In [11]:
extracted_features = get_activations(model, -2, samples)[0]
y = TSNE(n_components=2).fit_transform(extracted_features)
sne_map = []
for e in y:
    sne_map.append({
        'x': round(e[0], 2),
        'y': round(e[1], 2)
    })

In [12]:
model_data = {
    'model_name': 'Deep Neural Network',
    'short_name': 'DNN',
    'description': '5개의 히든 레이어를 가지는 심층 신경망 모델 (10-16-16-16-16-10)이다. epochs=5, batch_size=32',
    'performance': {
        'accuracy': performances['accuracy'],
        'recall': performances['recall'],
        'precision': performances['precision']
    },
    'predict': predicts,
    't-sne': sne_map
}

In [13]:
jstring = json.dumps(model_data, indent=2)
f = open(filename, "w")
f.write(jstring)

TypeError: Object of type float32 is not JSON serializable