In [1]:
from tqdm import tqdm_notebook as tqdm
from IPython.display import Markdown, display
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

import tensorflow as tf
from tensorflow import keras

In [2]:
def printmd(string):
    display(Markdown(string))

При чтении будем хранить данные в переменной `CACHE`, чтобы ускорить загрузку. В случае их обновления, достаточно вызвать `read(override=True)`

In [3]:
SMALL_DATASET_DIR = '../lab1/notMNIST_small/'
LARGE_DATASET_DIR = '../lab1/notMNIST_large/'

LABEL_MAP = {}
INV_LABEL_MAP = {}

In [4]:
def read(data_dir):
    f_v = 0
    
    X, y = [], []
    
    for f in tqdm(os.listdir(data_dir), desc='Letter'):
        
        if not f.startswith('.'):
            img_dir = os.path.join(data_dir, f)
            
            for img in os.listdir(img_dir):
                img_path = os.path.join(img_dir, img)
                data = cv2.imread(img_path, 0)
                
                if data is None:
                    continue
                    
                X.append(data * 2 / 255 - 1)
                
                if LABEL_MAP.get(f) is None:
                    LABEL_MAP[f] = f_v
                    INV_LABEL_MAP[f_v] = f
                    f_v += 1
                    
                y.append(LABEL_MAP[f])
                
    X = np.array(X)
    y = np.array(y)
    
    return X, y

In [5]:
def get_split_data(data_dir, size=(0.7, 0.2, 0.1), random_state=23):
    X, y = read(data_dir)
    assert abs(np.sum(size) - 1.0) < 0.001
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                        test_size=size[2], 
                                                        random_state=random_state, 
                                                        shuffle=True)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 
                                                      test_size=size[1], 
                                                      random_state=random_state, 
                                                      shuffle=True)
    
    return X_train, y_train, X_val, y_val, X_test, y_test

In [6]:
X_train, y_train, X_val, y_val, X_test, y_test = get_split_data(SMALL_DATASET_DIR)

HBox(children=(IntProgress(value=0, description='Letter', max=10, style=ProgressStyle(description_width='initi…




In [7]:
X_val.shape, y_val.shape, X_test.shape, y_test.shape

((3371, 28, 28), (3371,), (1873, 28, 28), (1873,))

In [8]:
lr = 0.1
epochs = 50
batch_size = 64

In [9]:
callbacks = [
    keras.callbacks.TensorBoard(log_dir='./logs', histogram_freq=1, batch_size=32, write_graph=True,
                                write_grads=True, write_images=False, embeddings_freq=0,
                                embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None,
                                update_freq='epoch'),
    keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.000001, patience=3, verbose=2, mode='auto',
                                  baseline=None,
                                  restore_best_weights=True),
    #keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.9, patience=2, verbose=2,
    #                                  mode='auto', min_delta=0.001, cooldown=0, min_lr=0),
]

In [10]:
printmd('**Building with new architecture...**')

print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}\n')

model = keras.Sequential([
   keras.layers.Flatten(input_shape=(28, 28)),
   keras.layers.Dense(128, activation=tf.nn.relu),
   keras.layers.Dense(96, activation=tf.nn.relu),
   keras.layers.Dense(64, activation=tf.nn.relu),
   keras.layers.Dense(10, activation=tf.nn.softmax)
])

print(model.summary())

model.compile(optimizer=tf.keras.optimizers.SGD(lr=lr),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

model.fit(X_train, y_train, 
          epochs=epochs,
          batch_size=batch_size, 
          validation_data=(X_val, y_val))
    
loss, acc = model.evaluate(X_test, y_test)

print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

Learning rate: 0.1
Epochs: 50

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
dense (Dense)                (None, 128)               100480    
_________________________________________________________________
dense_1 (Dense)              (None, 96)                12384     
_________________________________________________________________
dense_2 (Dense)              (None, 64)                6208      
_________________________________________________________________
dense_3 (Dense)              (None, 10)                650       
Total params: 119,722
Trainable params: 119,722
Non-trainable params: 0
_________________________________________________________________
None
Train on 13480 samples, vali

In [19]:
printmd('**Building with new architecture...**')

epochs = 50

print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')
print()
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28, 1)),
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(256, activation=tf.nn.relu),
    keras.layers.Dense(96, activation=tf.nn.relu),
    keras.layers.Dropout(0.2),
    keras.layers.Dense(64, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])

print(model.summary())

model.compile(optimizer=tf.keras.optimizers.SGD(lr=lr),
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_val = X_val.reshape(X_val.shape[0], 28, 28, 1)

model.fit(X_train, y_train, 
          epochs=epochs, 
          batch_size=batch_size,
          validation_data=(X_val, y_val))

loss, acc = model.evaluate(X_test, y_test)

print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

Learning rate: 0.0001
Epochs: 50

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_9 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_46 (Dense)             (None, 512)               401920    
_________________________________________________________________
dropout_16 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_47 (Dense)             (None, 256)               131328    
_________________________________________________________________
dense_48 (Dense)             (None, 96)                24672     
_________________________________________________________________
dropout_17 (Dropout)         (None, 96)                0         
_________________________________________________________________
dense_49 (Dense)             (None, 64)   

Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50


Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50


Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50


Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50


Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50


Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50


Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50


Epoch 49/50
Epoch 50/50
Loss: 1.4368060074957436
Accuracy: 0.7479978799819946


In [12]:
lr = 0.0001
epochs = 30
printmd('**Building with new architecture...**')

print(f'Learning rate: {lr}')
print(f'Epochs: {epochs}')



model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(512, activation=tf.nn.tanh),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(128, activation=tf.nn.relu),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(32, activation=tf.nn.relu),
    keras.layers.Dense(10, activation=tf.nn.softmax)
])
loss_method = 'sparse_categorical_crossentropy'

model.compile(optimizer=tf.train.AdamOptimizer(lr), 
              loss=loss_method,
              metrics=['accuracy'])

model.fit(X_train, y_train, 
          epochs=epochs,
          validation_data=(X_val, y_val),
          callbacks=callbacks)

loss, acc = model.evaluate(X_test, y_test)

print(f'Loss: {loss}')
print(f'Accuracy: {acc}')
print('=======================')

**Building with new architecture...**

Learning rate: 0.0001
Epochs: 30
Train on 13480 samples, validate on 3371 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30


Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30


Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30


Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30


Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30


Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30


Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30


Epoch 30/30
Loss: 0.27060418183384766
Accuracy: 0.917245090007782


In [1]:
def viz_clf(train_sizes, val_scores, test_scores):
    plt.title('Validation and Hold out scores for N train samples')
    plt.xlabel('N')
    plt.ylabel('Score')
    plt.plot(train_sizes, val_scores, color='green', label='valid')
    plt.plot(train_sizes, test_scores, color='blue', label='hold out')
    plt.legend()
    plt.show()

In [None]:
plt.figure(figsize=(20, 10))
viz_clf(train_sizes, val_scores, test_scores)