In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, BatchNormalization, TimeDistributed,LSTM,Dropout
from tensorflow.keras import Model
from tensorflow.keras import Model
import numpy as np 
import pandas as pd
import os
from data import preproc as pp
from matplotlib import pyplot as plt
from PIL import Image
import random

In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
tf.keras.backend.set_floatx('float64')

In [3]:
data_dir = '/home/austin/Documents/Github/handwritingnotebook/data/'

In [4]:
data_csv = pd.read_csv(data_dir+'words_csv/2020-06-03 11:39:42.000901.csv')

In [5]:
BATCH_SIZE = 100
IMG_HEIGHT = data_csv['height'].max()
IMG_WIDTH = data_csv['width'].max()
DATASET_SIZE = data_csv.shape[0]

https://www.tensorflow.org/tutorials/load_data/images

In [6]:
dataset = tf.data.TextLineDataset(data_dir+'words_csv/2020-06-03 11:39:42.000901.csv').skip(1)

In [7]:
def parse_line(line):
    img_filename = tf.strings.split(line,',')[0]
    img_label = tf.strings.split(line,',')[1]
    img_label = tf.io.decode_raw(tf.strings.bytes_split(img_label),tf.uint8)
    img = tf.image.decode_png(tf.io.read_file(data_dir + 'words_screenshot_labeled/'+img_filename),channels = 3)
    img = tf.image.rgb_to_grayscale(img)
    img = tf.image.resize_with_pad(img,IMG_HEIGHT,IMG_WIDTH)
    img = tf.image.transpose(img)
    img = img/255
    return(img,img_label)

In [8]:
labeled_ds = dataset.map(parse_line, num_parallel_calls=AUTOTUNE)

In [9]:
labeled_ds

<ParallelMapDataset shapes: ((783, 464, 1), (None, None)), types: (tf.float32, tf.uint8)>

In [10]:
train_size = int(0.90 * DATASET_SIZE)
test_size = int(0.10 * DATASET_SIZE)

full_dataset = labeled_ds.shuffle(DATASET_SIZE)
train_ds = full_dataset.take(train_size)
train_ds = train_ds.batch(BATCH_SIZE)
test_ds = full_dataset.skip(train_size)
test_ds = test_ds.batch(BATCH_SIZE)

In [11]:
class MyModel(Model):
    def __init__(self,
                 loss_object,
                 optimizer,
                 train_loss,
                 train_metric,
                 test_loss,
                 test_metric):
        '''
            Setting all the variables for our model.
        '''
        super(MyModel, self).__init__()
        self.cnn1 = Conv2D(32, (7, 7), padding="same", activation="relu", input_shape = (IMG_WIDTH,IMG_HEIGHT))
        self.maxp1 = MaxPooling2D(pool_size=(2, 2))
        self.norm1 = BatchNormalization()
        self.cnn2 = Conv2D(64, (3, 3), padding="same", activation="relu")
        self.maxp1 = MaxPooling2D(pool_size=(2, 2))
        self.norm1 = BatchNormalization()
        self.cnn3 = Conv2D(128, (3, 3), padding="same", activation="relu")
        self.maxp1 = MaxPooling2D(pool_size=(2, 2))
        self.norm1 = BatchNormalization()
        self.cnn4 = Conv2D(256, (3, 3), padding="same", activation="relu")
        self.maxp1 = MaxPooling2D(pool_size=(2, 2))
        self.norm1 = BatchNormalization()
        self.flat = Flatten()
        self.rnn = (TimeDistributed(self.flat))
        self.lstm = LSTM(1024, activation = 'relu')
        self.dropout = Dropout(0.5)
        self.optlyr = Dense(95, activation = 'sigmoid')
        
        self.loss_object = loss_object
        self.optimizer = optimizer
        self.train_loss = train_loss
        self.train_metric = train_metric
        self.test_loss = test_loss
        self.test_metric = test_metric
    def cnn_model(self, x):
        '''
            Defining the architecture of our model. This is where we run 
            through our whole dataset and return it, when training and 
            testing.
        '''
        x = self.cnn1(x) 
        x = self.maxp1(x) 
        x = self.norm1(x) 
        x = self.cnn2(x)
        x = self.maxp1(x) 
        x = self.norm1(x)
        x =  self.cnn3(x) 
        x = self.maxp1(x) 
        x = self.norm1(x) 
        x = self.cnn4(x)
        x = self.maxp1(x) 
        x = self.norm1(x) 
        x = self.flat(x)
        x = self.rnn 
        x = self.lstm(x) 
        x = self.dropout(x) 
        return self.optlyr(x)
        
        
        
        
        
    @tf.function
    def train_step(self, images, labels):
        '''
            This is a TensorFlow function, run once for each epoch for the
            whole input. We move forward first, then calculate gradients 
            with Gradient Tape to move backwards.
        '''
        with tf.GradientTape() as tape:
            predictions = self.cnn_model(images)
            loss = self.loss_object(labels, predictions)
        gradients = tape.gradient(loss, self.trainable_variables)
        optimizer.apply_gradients(zip(
                                  gradients, self.trainable_variables))

        self.train_loss(loss)
        self.train_metric(labels, predictions)
        
    def fit(self, train, test, epochs):
        '''
            This fit function runs training and testing.
        '''
        for epoch in range(epochs):
            for images, labels in train:
                self.train_step(images, labels)

            for test_images, test_labels in test:
                self.test_step(test_images, test_labels)

            template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
            print(template.format(epoch+1,
                                  self.train_loss.result(),
                                  self.train_metric.result()*100,
                                  self.test_loss.result(),
                                  self.test_metric.result()*100))

            # Reset the metrics for the next epoch
            self.train_loss.reset_states()
            self.train_metric.reset_states()
            self.test_loss.reset_states()
            self.test_metric.reset_states()

In [12]:
# Make a loss object
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()

# Select the optimizer
optimizer = tf.keras.optimizers.Adam()

# Specify metrics for training
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

# Specify metrics for testing
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_metric = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')


In [13]:
# Create an instance of the model
model = MyModel(loss_object = loss_object,
                optimizer = optimizer,
                train_loss = train_loss,
                train_metric = train_metric,
                test_loss = test_loss,
                test_metric = test_metric)

EPOCHS = 5

W0614 14:36:15.109354 139650346469184 recurrent_v2.py:1102] Layer lstm will not use cuDNN kernel since it doesn't meet the cuDNN kernel criteria. It will use generic GPU kernel as fallback when running on GPU


In [14]:
model.fit(train = train_ds,
          test = test_ds,
          epochs = EPOCHS)

InvalidArgumentError: Cannot batch tensors with different shapes in component 1. First element had shape [3,1] and element 1 had shape [4,1].