In [1]:
!pip install comet-ml



In [2]:
! git clone https://github.com/dudeperf3ct/TextRecognizer.git

Cloning into 'TextRecognizer'...
remote: Enumerating objects: 229, done.[K
remote: Counting objects: 100% (229/229), done.[K
remote: Compressing objects: 100% (115/115), done.[K
remote: Total 229 (delta 125), reused 209 (delta 107), pack-reused 0[K
Receiving objects: 100% (229/229), 21.26 MiB | 1.56 MiB/s, done.
Resolving deltas: 100% (125/125), done.


In [3]:
%cd TextRecognizer/Experiment-1/notebooks

/home/dudeperf3ct/Downloads/TextRecognizer/Experiment-1/notebooks/TextRecognizer/Experiment-1/notebooks


In [4]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [5]:
import tensorflow as tf
if tf.test.gpu_device_name():
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

In [6]:
import sys
sys.path.append('..')

In [7]:
from src.data.emnist_dataset import EMNIST

In [13]:
dataset = EMNIST()
(x_train, y_train), (x_test, y_test) = dataset.load_data()

In [14]:
print(dataset)
print('Training shape:', x_train.shape, y_train.shape)
print('Test shape:', x_test.shape, y_test.shape)

EMNIST Dataset
Num classes: 62
Mapping: {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9', 10: 'A', 11: 'B', 12: 'C', 13: 'D', 14: 'E', 15: 'F', 16: 'G', 17: 'H', 18: 'I', 19: 'J', 20: 'K', 21: 'L', 22: 'M', 23: 'N', 24: 'O', 25: 'P', 26: 'Q', 27: 'R', 28: 'S', 29: 'T', 30: 'U', 31: 'V', 32: 'W', 33: 'X', 34: 'Y', 35: 'Z', 36: 'a', 37: 'b', 38: 'c', 39: 'd', 40: 'e', 41: 'f', 42: 'g', 43: 'h', 44: 'i', 45: 'j', 46: 'k', 47: 'l', 48: 'm', 49: 'n', 50: 'o', 51: 'p', 52: 'q', 53: 'r', 54: 's', 55: 't', 56: 'u', 57: 'v', 58: 'w', 59: 'x', 60: 'y', 61: 'z'}
Input shape: [28, 28]

Training shape: (697932, 28, 28) (697932, 62)
Test shape: (116323, 28, 28) (116323, 62)


## EDA

In [None]:
%%writefile ../src/training/train_model.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from comet_ml import Experiment

from pathlib import Path
import sys
sys.path.append(str(Path(__file__).resolve().parents[2]))
from sklearn.model_selection import train_test_split
from src.training.util import train_model
from src.data.emnist_dataset import EMNIST
from src.models.character_model import Character_Model
from src.networks.lenet import lenet
import argparse

def _parse_args():
    """Parse command-line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument("-s", "--save-model", type=int, default=False,
        help="whether or not model should be saved")
    parser.add_argument("-w", "--weights", type=str, default=True,
        help="whether or not weights should be saved")
    parser.add_argument("-m", '--model', type=str, default="Character_Model",
        help="which model to use")
    parser.add_argument("-n", '--network', type=str, default="lenet",
        help="which network architecture to use")
    parser.add_argument("-d", '--dataset', type=str, default="EMNIST",
        help="which dataset to use")
    parser.add_argument("-e", '--epochs', type=int, default=10,
        help="Number of epochs")
    parser.add_argument("-b", '--batch_size', type=int, default=32,
        help="Batch size")        
    args = vars(parser.parse_args())

    return args


funcs = {'EMNIST': EMNIST, 'lenet': lenet, 'Character_Model': Character_Model}


def train(args, use_comet : bool = True):

    data_cls = funcs[args['dataset']]
    model_cls = funcs[args['model']]
    network = funcs[args['network']]

    print ('[INFO] Getting dataset...')
    data = data_cls()
    (x_train, y_train), (x_test, y_test) = data.load_data()
    
    #Used for testing only
    x_train = x_train[:100, :, :]
    y_train = y_train[:100, :]
    x_test = x_test[:100, :, :]
    y_test = y_test[:100, :]
    print ('[INFO] Training shape: ', x_train.shape, y_train.shape)
    print ('[INFO] Test shape: ', x_test.shape, y_test.shape)
    #delete these lines

    # add this stratify=y_train after verifying distribution of classes 
    (x_train, x_valid, y_train, y_valid) = train_test_split(x_train, y_train, test_size=0.2,
                                                 random_state=42)

    print ('[INFO] Training shape: ', x_train.shape, y_train.shape)
    print ('[INFO] Validation shape: ', x_valid.shape, y_valid.shape)
    print ('[INFO] Test shape: ', x_test.shape, y_test.shape)

    print ('[INFO] Setting up the model..')
    model = model_cls(network, data_cls)
    print (model)
    
    dataset = dict({
        'x_train' : x_train,
        'y_train' : y_train,
        'x_valid' : x_valid,
        'y_valid' : y_valid,
        'x_test' : x_test,
        'y_test' : y_test
    })

    if use_comet:
        #create an experiment with your api key
        experiment = Experiment(api_key='WVBNRAfMLCBWslJAAsffxM4Gz',
                                project_name='emnist',
                                auto_param_logging=False)
        
        print ('[INFO] Starting Training...')
        #will log metrics with the prefix 'train_'   
        with experiment.train():
            _ = train_model(
                    model,
                    dataset,
                    batch_size=args['batch_size'],
                    epochs=args['epochs']
                    )

        print ('[INFO] Starting Testing...')    
        #will log metrics with the prefix 'test_'
        with experiment.test():  
            loss, score = model.evaluate(dataset, args['batch_size'])
            print(f'[INFO] Test evaluation: {score}')
            metrics = {
                'loss':loss,
                'accuracy':score
            }
            experiment.log_metrics(metrics)    

        experiment.log_parameters(args)
        experiment.log_dataset_hash(x_train) #creates and logs a hash of your data 

    else :

        print ('[INFO] Starting Training...')
        train_model(
            model,
            dataset,
            batch_size=args['batch_size'],
            epochs=args['epochs']
            )
        print ('[INFO] Starting Testing...')    
        loss, score = model.evaluate(dataset, args['batch_size'])
        print(f'[INFO] Test evaluation: {score}')

    if args['weights']:
        model.save_weights()


def main():
    """Run experiment."""
    args = _parse_args()
    train(args)

if __name__ == '__main__':
    main()

## Training

In [None]:
! python ../src/training/train_model.py -b 16 -e 1

[INFO] Getting dataset...


## Testing