In [4]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import h5py

import math


from os.path import expanduser
home = expanduser("~")


In [32]:
def load_mnist_csv(path, one_hot = False, shape = None):
    df_train = pd.read_csv(path + "mnist_train.csv", header=None)
    df_test = pd.read_csv(path + "mnist_test.csv", header=None)
    
    X_train = df_train.iloc[:, 1:].values/255.0
    X_test = df_test.iloc[:, 1:].values/255.0
    y_train = df_train.iloc[:, 0].values
    y_test = df_test.iloc[:, 0].values
    
    if shape == "2D":
        X_train = X_train.reshape(-1, 28, 28)
        X_test = X_test.reshape(-1, 28, 28)
    
    if one_hot:
        eye = np.eye(len(np.unique(y_train)))
        y_train, y_test = eye[y_train], eye[y_test]
        
    return X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test = load_mnist_csv(home + "/data/MNIST/", one_hot=True, shape = "2D")
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((60000, 28, 28), (10000, 28, 28), (60000, 10), (10000, 10))

In [15]:
model = keras.Sequential([
    keras.layers.Input(shape = (28, 28)),
    keras.layers.LSTM(units = 128, activation = "tanh", return_sequences=True)
])
model.summary()
model.predict(X_train[:1]).shape

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_4 (LSTM)                (None, 28, 128)           80384     
Total params: 80,384
Trainable params: 80,384
Non-trainable params: 0
_________________________________________________________________


(1, 28, 128)

In [17]:
# default value for return_sequences

model = keras.Sequential([
    keras.layers.Input(shape = (28, 28)),
    keras.layers.LSTM(units = 128, activation = "tanh", return_sequences=False) 
])
model.summary()
model.predict(X_train[:1]).shape

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 128)               80384     
Total params: 80,384
Trainable params: 80,384
Non-trainable params: 0
_________________________________________________________________


(1, 128)

In [28]:
# default value for return_sequences

model = keras.Sequential([
    keras.layers.Input(shape = (28, 28)),
    keras.layers.LSTM(units = 128, activation = "tanh", return_sequences=True),
    keras.layers.Dropout(rate=0.2),
    keras.layers.LSTM(units = 128, activation = "tanh", return_sequences=False),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(units = 128, activation = "relu"),
    keras.layers.Dense(units = 10, activation = "softmax")
])
model.summary()
model.predict(X_train[:1]).shape

Model: "sequential_17"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_27 (LSTM)               (None, 28, 128)           80384     
_________________________________________________________________
dropout_12 (Dropout)         (None, 28, 128)           0         
_________________________________________________________________
lstm_28 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dropout_13 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 128)               16512     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                1290      
Total params: 229,770
Trainable params: 229,770
Non-trainable params: 0
_______________________________________________

(1, 10)

In [36]:

model = keras.Sequential([
    keras.layers.Input(shape = (28, 28)),
    keras.layers.LSTM(units = 128, activation = "tanh", return_sequences=True),
    keras.layers.Dropout(rate=0.2),
    keras.layers.LSTM(units = 128, activation = "tanh", return_sequences=False),
    keras.layers.Dropout(rate=0.2),
    keras.layers.Dense(units = 128, activation = "relu"),
    keras.layers.Dense(units = 10, activation = "softmax")
])
model.summary()
op = keras.optimizers.Adam(learning_rate=0.001, decay = 1e-5)
model.compile(optimizer = op, 
              loss = keras.losses.categorical_crossentropy, 
              metrics = ["accuracy"])
model.fit(X_train, y_train, validation_data = (X_test, y_test), epochs = 10, batch_size = 64)

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_41 (LSTM)               (None, 28, 128)           80384     
_________________________________________________________________
dropout_26 (Dropout)         (None, 28, 128)           0         
_________________________________________________________________
lstm_42 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dropout_27 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 128)               16512     
_________________________________________________________________
dense_19 (Dense)             (None, 10)                1290      
Total params: 229,770
Trainable params: 229,770
Non-trainable params: 0
_______________________________________________

<tensorflow.python.keras.callbacks.History at 0x7febb607ef50>