<a href="https://colab.research.google.com/github/brs1977/neural-university/blob/master/HW/01_mnist_gpu.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten 
from tensorflow.keras import utils
from tensorflow.keras.preprocessing import image
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
import tensorflow as tf
from sklearn.model_selection import train_test_split
from google.colab import files
import numpy as np
import random
import os
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline 

##Загрузка и подготовка данных

In [0]:
(x_train_org, y_train_org), (x_test_org, y_test_org) = mnist.load_data()

x_train = x_train_org.reshape(60000, 784)
x_test = x_test_org.reshape(10000, 784)

x_train = x_train.astype('float32') / 255
y_train = utils.to_categorical(y_train_org, 10)

#x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)

x_test = x_test.astype('float32') / 255
y_test = utils.to_categorical(y_test_org, 10)

y_train[100:110]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

#Создаем модель

In [0]:
def create_model(layers, TPU=False):
  model = Sequential()
  # Входной полносвязный слой, 800 нейронов, 784 входа в каждый нейрон
  #model.add(Dense(800, input_dim=784, activation="relu"))
  for layer in layers:
    model.add(layer)
  
  # Выходной полносвязный слой, 10 нейронов (по количеству рукописных цифр)
  model.add(Dense(10, activation="softmax"))

  if TPU:
    model = tf.contrib.tpu.keras_to_tpu_model(
        model,
        strategy=tf.contrib.tpu.TPUDistributionStrategy(
            tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
        )
    )  
  
  model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
  
  return model

def callbacks(verbose=0):
  #будем сохранять последнюю лучшую модель 
  сheckpoint = ModelCheckpoint('mnist-cnn.hdf5', 
                                monitor='acc', 
                                save_best_only=True,
                                verbose=verbose)

  #уменьшаем скорость обучения в 2 раза, если за 3 эпохи, метрика не улучшилась
  learning_rate_reduction = ReduceLROnPlateau(monitor='acc', 
                                              patience=3, 
                                              verbose=verbose, 
                                              factor=0.5, 
                                              min_lr=0.00001)

  #остановим обучение если за 5 эпох, метрика не улучшилась
  earlystop = EarlyStopping(monitor='acc', min_delta=0.001, patience=5,verbose=verbose, mode='auto')  
  return [сheckpoint,learning_rate_reduction,earlystop]

def predict(layers,verbose=0):
  model = create_model(layers)
  history = model.fit(x_train, y_train, batch_size=batch_size, epochs=100, callbacks=callbacks(),verbose=verbose)  
  
  model = create_model(layers)
  model.load_weights('mnist-cnn.hdf5')
  
  predictions = model.predict(x_test)
  train_acc = np.max(history.history['acc'])
  test_acc = accuracy_score(np.argmax(y_test,axis=1), np.argmax(predictions,axis=1))
  return (train_acc,test_acc)
  
  
def test_layers(units=800,batch_size = 256, range_len = 10, verbose = 0):  
  res = []
  
  train_acc = 0
  test_acc = 0
  for i in range(range_len):
    trn_acc,tst_acc = predict([Dense(units, input_dim=784, activation="relu")])
    train_acc += trn_acc/range_len
    test_acc += tst_acc/range_len
    
  res.append([1,units,train_acc,test_acc])
                        
  train_acc = 0
  test_acc = 0
  for i in range(range_len):
    trn_acc,tst_acc = predict([Dense(units, input_dim=784, activation="relu"),
                                  Dense(units, activation="relu")])
    train_acc += trn_acc/range_len
    test_acc += tst_acc/range_len

  res.append([2,units,train_acc,test_acc])

  train_acc = 0
  test_acc = 0
  for i in range(range_len):
    trn_acc,tst_acc = predict([Dense(units, input_dim=784, activation="relu"),
                                   Dense(units, activation="relu"),
                                   Dense(units, activation="relu")])
    train_acc += trn_acc/range_len
    test_acc += tst_acc/range_len
    
  res.append([3,units,train_acc,test_acc])
  
  return res     

In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=800,batch_size=batch_size,range_len=10 )
print(res)


[[1, 800, 0.9999749958515165, 0.98338], [2, 800, 0.998963326215744, 0.9837999999999999], [3, 800, 0.9985133349895476, 0.98293]]
CPU times: user 26min 54s, sys: 3min 37s, total: 30min 32s
Wall time: 26min 29s


In [0]:
random.seed(666)
batch_size = 256
res = test_layers(units=500,batch_size=batch_size,range_len=10 )
print(res)


[[1, 500, 0.9999383330345153, 0.98239], [2, 500, 0.9990483283996582, 0.9830999999999999], [3, 500, 0.9988149940967559, 0.98373]]


In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=600,batch_size=batch_size,range_len=10 )
print(res)

[[1, 600, 0.9999300003051759, 0.9824800000000001], [2, 600, 0.9994083285331725, 0.98437], [3, 600, 0.9989583373069764, 0.98325]]
CPU times: user 1h 2min 10s, sys: 2min 51s, total: 1h 5min 2s
Wall time: 1h 56s


In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=700,batch_size=batch_size,range_len=10 )
print(res)

[[1, 700, 0.9999166667461393, 0.9827700000000001], [2, 700, 0.9993333280086516, 0.98459], [3, 700, 0.9976899981498717, 0.9819500000000001]]
CPU times: user 1h 24min 41s, sys: 3min 6s, total: 1h 27min 47s
Wall time: 1h 23min 59s


In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=900,batch_size=batch_size,range_len=10 )
print(res)

[[1, 900, 0.9999683320522308, 0.9829299999999999], [2, 900, 0.9994483292102813, 0.98549], [3, 900, 0.9988383293151855, 0.9839800000000001]]
CPU times: user 1h 57min 14s, sys: 4min 19s, total: 2h 1min 33s
Wall time: 1h 58min 7s


In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=1000,batch_size=batch_size,range_len=10 )
print(res)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
[[1, 1000, 0.9999783277511596, 0.98355], [2, 1000, 0.9992216706275939, 0.9851900000000001], [3, 1000, 0.9981000006198882, 0.9840899999999999]]
CPU times: user 21min 8s, sys: 3min 31s, total: 24min 40s
Wall time: 20min 47s


In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=1100,batch_size=batch_size,range_len=10 )
print(res)

[[1, 1100, 0.999943333864212, 0.98322], [2, 1100, 0.9988299906253814, 0.98383], [3, 1100, 0.9992283284664153, 0.9847400000000001]]
CPU times: user 38min 25s, sys: 4min 20s, total: 42min 45s
Wall time: 38min 42s


In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=1200,batch_size=batch_size,range_len=10 )
print(res)

[[1, 1200, 0.9999366641044616, 0.98373], [2, 1200, 0.9991550028324127, 0.98437], [3, 1200, 0.9987733304500579, 0.9846900000000001]]
CPU times: user 1h 1min 2s, sys: 4min 43s, total: 1h 5min 46s
Wall time: 1h 1min 52s


In [0]:
%%time 

random.seed(666)
batch_size = 256
res = test_layers(units=1300,batch_size=batch_size,range_len=10 )
print(res)

[[1, 1300, 0.9998000025749206, 0.9832599999999999], [2, 1300, 0.9991166651248932, 0.9847399999999998], [3, 1300, 0.9985633254051208, 0.9842799999999999]]
CPU times: user 1h 25min 14s, sys: 5min 10s, total: 1h 30min 25s
Wall time: 1h 26min 47s


In [0]:
res = [[1, 500, 0.9999383330345153, 0.98239], [2, 500, 0.9990483283996582, 0.9830999999999999], [3, 500, 0.9988149940967559, 0.98373]] + \
      [[1, 600, 0.9999300003051759, 0.9824800000000001], [2, 600, 0.9994083285331725, 0.98437], [3, 600, 0.9989583373069764, 0.98325]] + \
      [[1, 700, 0.9999166667461393, 0.9827700000000001], [2, 700, 0.9993333280086516, 0.98459], [3, 700, 0.9976899981498717, 0.9819500000000001]] + \
      [[1, 800, 0.9999749958515165, 0.98338], [2, 800, 0.998963326215744, 0.9837999999999999], [3, 800, 0.9985133349895476, 0.98293]] + \
      [[1, 900, 0.9999683320522308, 0.9829299999999999], [2, 900, 0.9994483292102813, 0.98549], [3, 900, 0.9988383293151855, 0.9839800000000001]] + \
      [[1, 1000, 0.9999783277511596, 0.98355], [2, 1000, 0.9992216706275939, 0.9851900000000001], [3, 1000, 0.9981000006198882, 0.9840899999999999]] + \
      [[1, 1100, 0.999943333864212, 0.98322], [2, 1100, 0.9988299906253814, 0.98383], [3, 1100, 0.9992283284664153, 0.9847400000000001]] + \
      [[1, 1200, 0.9999366641044616, 0.98373], [2, 1200, 0.9991550028324127, 0.98437], [3, 1200, 0.9987733304500579, 0.9846900000000001]] + \
      [[1, 1300, 0.9998000025749206, 0.9832599999999999], [2, 1300, 0.9991166651248932, 0.9847399999999998], [3, 1300, 0.9985633254051208, 0.9842799999999999]]

df = pd.DataFrame(res,columns=('level','units','train_acc','test_acc'))

In [0]:
def test_deep_layers(units=800,batch_size = 256, range_len = 10, verbose = 0):  
  res = []
  
  train_acc = 0
  test_acc = 0
  for i in range(range_len):
    trn_acc,tst_acc = predict([Dense(units, input_dim=784, activation="relu"),
                                   Dense(units-100, activation="relu"),
                                   Dense(units-200, activation="relu"),
                                   Dense(units-300, activation="relu")])
    train_acc += trn_acc/range_len
    test_acc += tst_acc/range_len
    
  res.append([4,units,train_acc,test_acc])

  train_acc = 0
  test_acc = 0
  for i in range(range_len):
    trn_acc,tst_acc = predict([Dense(units, input_dim=784, activation="relu"),
                                   Dense(units-100, activation="relu"),
                                   Dense(units-200, activation="relu"),
                                   Dense(units-300, activation="relu"),
                                   Dense(units-400, activation="relu")])
    train_acc += trn_acc/range_len
    test_acc += tst_acc/range_len
    
  res.append([5,units,train_acc,test_acc])
  
  train_acc = 0
  test_acc = 0
  for i in range(range_len):
    trn_acc,tst_acc = predict([Dense(units, input_dim=784, activation="relu"),
                                   Dense(units-100, activation="relu"),
                                   Dense(units-200, activation="relu"),
                                   Dense(units-300, activation="relu"),
                                   Dense(units-400, activation="relu"),
                                   Dense(units-500, activation="relu")])
    train_acc += trn_acc/range_len
    test_acc += tst_acc/range_len
    
  res.append([6,units,train_acc,test_acc])
  
  return res     

In [0]:
%%time 

random.seed(666)
batch_size = 256
res600 = test_deep_layers(units=600,batch_size=batch_size,range_len=10 )
print(res600)

random.seed(666)
batch_size = 256
res700 = test_deep_layers(units=700,batch_size=batch_size,range_len=10 )
print(res700)

random.seed(666)
batch_size = 256
res800 = test_deep_layers(units=800,batch_size=batch_size,range_len=10 )
print(res800)

random.seed(666)
batch_size = 256
res900 = test_deep_layers(units=900,batch_size=batch_size,range_len=10 )
print(res900)

random.seed(666)
batch_size = 256
res1000 = test_deep_layers(units=1000,batch_size=batch_size,range_len=10 )
print(res1000)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
[[4, 600, 0.9989766657352447, 0.98451], [5, 600, 0.9988183319568633, 0.9839], [6, 600, 0.9984150052070618, 0.9838000000000001]]
[[4, 700, 0.998971664905548, 0.98469], [5, 700, 0.999146670103073, 0.98487], [6, 700, 0.9974916696548461, 0.98174]]
[[4, 800, 0.9979350090026855, 0.98311], [5, 800, 0.9985316634178161, 0.98342], [6, 800, 0.9985399961471556, 0.98438]]
[[4, 900, 0.9986350059509278, 0.9836499999999999], [5, 900, 0.9983533322811126, 0.98356], [6, 900, 0.9982016623020172, 0.98383]]
[[4, 1000, 0.9980516731739043, 0.98347], [5, 1000, 0.9988266646862028, 0.98507], [6, 1000, 0.998230004310608, 0.98402]]
CPU times: user 8h 31min 59s, sys: 35min 28s, total: 9h 7min 27s
Wall time: 8h 44min 45s


In [0]:
df[df['level']==1].sort_values('train_acc', ascending=False)


Unnamed: 0,level,units,train_acc,test_acc
15,1,1000,0.999978,0.98355
9,1,800,0.999975,0.98338
12,1,900,0.999968,0.98293
18,1,1100,0.999943,0.98322
0,1,500,0.999938,0.98239
21,1,1200,0.999937,0.98373
3,1,600,0.99993,0.98248
6,1,700,0.999917,0.98277
24,1,1300,0.9998,0.98326


In [0]:
df[df['level']==1].sort_values('test_acc', ascending=False)

Unnamed: 0,level,units,train_acc,test_acc
0,1,500,0.999938,0.98239
3,1,600,0.99993,0.98248
6,1,700,0.999917,0.98277
12,1,900,0.999968,0.98293
18,1,1100,0.999943,0.98322
9,1,800,0.999975,0.98338
15,1,1000,0.999978,0.98355
21,1,1200,0.999937,0.98373


In [0]:
df[df['level']==2].sort_values('train_acc', ascending=False)

Unnamed: 0,level,units,train_acc,test_acc
19,2,1100,0.99883,0.98383
10,2,800,0.998963,0.9838
1,2,500,0.999048,0.9831
22,2,1200,0.999155,0.98437
16,2,1000,0.999222,0.98519
7,2,700,0.999333,0.98459
4,2,600,0.999408,0.98437
13,2,900,0.999448,0.98549


In [0]:
df[df['level']==1].sort_values('test_acc', ascending=False)

Unnamed: 0,level,units,train_acc,test_acc
0,1,500,0.999938,0.98239
3,1,600,0.99993,0.98248
6,1,700,0.999917,0.98277
12,1,900,0.999968,0.98293
18,1,1100,0.999943,0.98322
9,1,800,0.999975,0.98338
15,1,1000,0.999978,0.98355
21,1,1200,0.999937,0.98373


In [0]:
predictions = model.predict(x_test)
accuracy_score(np.argmax(y_test,axis=1), np.argmax(predictions,axis=1))

In [0]:
'''
history = model.fit(x_train,y_train, #datagen.flow(x_train,y_train, batch_size=batch_size), 
                    #aug.flow(X_train,Y_train, batch_size=batch_size), 
                    epochs=1000,
                    #validation_data=(x_val, y_val),
                    #steps_per_epoch=x_train.shape[0] // batch_size,
                    verbose=1,
                    callbacks=[сheckpoint, learning_rate_reduction, earlystop])
'''


In [0]:

model = create_model(TPU =True)
model.load_weights('mnist-cnn.hdf5')

score = model.evaluate(x_test, y_test, verbose=0)

# Print test accuracy
print('\n', 'Test accuracy:', score[1], 'Loss: ',score[0])


INFO:tensorflow:Querying Tensorflow master (grpc://10.118.0.18:8470) for TPU system metadata.
INFO:tensorflow:Found TPU system:
INFO:tensorflow:*** Num TPU Cores: 8
INFO:tensorflow:*** Num TPU Workers: 1
INFO:tensorflow:*** Num TPU Cores Per Worker: 8
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, -1, 13172324818075806562)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 14542490687634635208)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 3926342142175128996)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 7881309208942047036)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 6103072050995062287)
INFO:tensorflow:*** Available Device: _DeviceAttributes(/

In [0]:
predictions = model.predict(x_test)
accuracy_score(np.argmax(y_test,axis=1), np.argmax(predictions,axis=1))

INFO:tensorflow:New input shapes; (re-)compiling: mode=infer (# of cores 8), [TensorSpec(shape=(2, 784), dtype=tf.float32, name='dense_6_input_10')]
INFO:tensorflow:Overriding default placeholder.
INFO:tensorflow:Remapping placeholder for dense_6_input
INFO:tensorflow:Started compiling
INFO:tensorflow:Finished compiling. Time elapsed: 1.0034997463226318 secs


In [0]:
def categorical_crossentropy(predictions, targets, epsilon=1e-12):
  predictions = np.clip(predictions, epsilon, 1. - epsilon)
  ce = - np.mean(np.log(predictions) * targets) 
  return ce

In [0]:
#np.sum( (np.argmax(y_test,axis=1) - np.argmax(predictions,axis=1))^2 )
#np.mean( (np.argmax(y_test,axis=1) - np.argmax(predictions,axis=1))^2 )
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error
#print(mean_squared_error(np.argmax(y_test,axis=1), np.argmax(predictions,axis=1)))
#print(mean_absolute_error(np.argmax(y_test,axis=1), np.argmax(predictions,axis=1)))
#print(np.sqrt(np.sum( (np.argmax(y_test,axis=1) - np.argmax(predictions,axis=1))^2 )))
#predictions = model.predict(x_test)
#categorical_crossentropy(predictions, y_test)
accuracy_score(np.argmax(y_test,axis=1), np.argmax(predictions,axis=1))
#predictions = model.predict(x_train)
#categorical_crossentropy(predictions, y_train)

0.9834

Задание Light

Проверьте, как сеть распознает разные цифры из тестового набора данных
Попробуйте сами написать цифру и распознать ее с помощью сеть
В цикле распознайте все цифры тестовой выборки и посчитайте точность распознавания на тестовой
Выберите 10 разных вариантов количества слоёв и нейронов в скрытых слоях от небольшого до большого и заполните таблицу с точностью обучения - на обучающей и на тестовой выборке
Напишите ваши выводы - как влияет количество нейронов в сети на точность на тестовой выборке
 

Задание Pro

Запустите в цикле поштучное распознавание всех цифр обучающей выборки
Выделите те, на которых была ошибка и точность распознавания была ниже некоторого порога alpha (например, 0.8)
Выбросите все ошибочные цифры из обучающей выборки
Переобучите сеть на «очищенной» выборке
Посчитайте точность на тестовой выборке и сравните с точность на тестовой до очистки обучающей и коэффициент роста точности
Составьте таблицу коэффициентов роста точности на 10 примерах в зависимости от размера сети и от разных alpha
Напишите ваши выводы - как влияет очистка данных на точность на тестовой выборке