In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import shutil
import cv2
from random import shuffle
from Utils import display, save, makeDir, is_binary

In [2]:
!pwd

/home/aidenchia/Documents/char-recognizer/src


In [None]:
def binarizeAllFiles(src, dst='../out/', method='normal', classLabelPos=2):
    for root, dirs, files in os.walk(src):
        for file in files:
            if '.png' not in file:
                continue
                
            filepath = os.path.join(root, file)
            print('[INFO] Loading file: {}'.format(filepath))
            img = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
            if method == 'normal':
                img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[-1]
            elif method == 'inverse':
                img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY_INV)[-1]
                
            classLabel = root.split('/')[classLabelPos]
            save(img, name=file, prefix=dst + classLabel, suffix=None)

In [None]:
def countNumImgFiles(directory):
    numFiles = 0
    for root, dirs, files in os.walk(directory):
        for file in files:
            if '.png' not in file:
                continue
            numFiles+= 1
    print('No. of Files in {}: {}'.format(directory, numFiles))
    return numFiles

In [None]:
def renameAllFiles(src, newName, classLabelPos=2):
    for root, dirs, files in os.walk(src):
        for file in files:
            if '.png' not in file:
                continue
            old = os.path.join(root, file)
            new = os.path.join(root, newName+file)
            print('[INFO] Renaming {} to {}'.format(old, new))
            os.rename(old, new)
            new = ""

In [22]:
def copyOverIfExists(src, dst, imgpath):
    fullSrcPath = os.path.join(src, imgpath)
    makeDir(dst)
    fullDstPath = os.path.join(dst, imgpath)
    if not os.path.exists(fullDstPath):
        print('[INFO] Moving {} to {}'.format(fullSrcPath, fullDstPath))
        shutil.move(fullSrcPath, fullDstPath)

In [None]:
def mergeIntoFolder(src, dst, classLabelPos=2):
    for root, dirs, files in os.walk(src):
        for file in files:
            classLabel = root.split('/')[classLabelPos]
            dstPath = dst + classLabel
            copyOverIfExists(root, dstPath, file)

## Prepare EMNIST Data

In [None]:
df = pd.read_csv('../EMNIST/emnist-balanced-test.csv', header=None)
df.head()

In [None]:
print('Total No. of Images:', df.shape[0])

In [None]:
classes = df[0].values
flattened = df.drop(0, axis=1).values
print('No. of Classes:', len(np.unique(classes)))

In [None]:
f = open('../EMNIST/emnist-balanced-mapping.txt')
mapping = {}
for line in f.readlines():
    line = line.split()
    classNum = int(line[0])
    gt = chr(int(line[1]))
    mapping[classNum] = gt
    
print(mapping)

In [None]:
from random import randint
pick = randint(0,47)
img = flattened[pick].reshape((28,28)).transpose()
print('Class:', mapping[classes[pick]])
print('Unique values: ', np.unique(img))
display(img)

In [None]:
for idx, i in enumerate(range(len(flattened))):
    img = flattened[i].reshape((28,28)).transpose()
    gt = mapping[classes[i]]
    save(img, name=str(idx), prefix= os.path.join('../imgs/test', gt), suffix='.png')

In [None]:
img = cv2.imread('../imgs/0/156.png', cv2.IMREAD_GRAYSCALE)
print(np.unique(img))
img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)[-1]
print(np.unique(img))
display(img)

In [None]:
binarizeAllFiles(src='../imgs', dst='../out/', method='normal')

In [None]:
is_binary(cv2.imread('../imgs/0/156.png'))

In [None]:
assert countNumImgFiles('../imgs') == countNumImgFiles('../out')

In [None]:
renameAllFiles('../imgs/', 'EMNIST_')

## Combine Hnd Data

In [None]:
files = [x for x in os.listdir('../Hnd/Img') if '.txt' not in x]
files.sort()
print(files)

In [None]:
gt = [ x for x in '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghiklmnopqrstuvwxyz']
print(gt)

In [None]:
binarizeAllFiles(src='../Hnd/Img', dst='../Hnd/Out/', method='inverse', classLabelPos=3)

In [None]:
assert countNumImgFiles('../Hnd/Img/') == countNumImgFiles('../Hnd/Out')

In [None]:
renameAllFiles('../Hnd/Out', 'Hnd_', 3)

In [None]:
shutil.copy('../Hnd/Out/L/Hnd_img022-027.png', '../imgs/L/Hnd_img022-027.png')

In [None]:
mergeIntoFolder('../Hnd/Out', '../imgs/', classLabelPos=3)

In [None]:
countNumImgFiles('../imgs/')

## Balance Classes

In [None]:
!pwd

In [None]:
files = []
src = '../imgs/train/slash'
dst = '../imgs/test/slash'
makeDir(dst)
for file in os.listdir(src):
    if '.png' not in file:
        continue
    files.append(file)

shuffle(files)
files = files[0: int(0.1 * len(files))]
for file in files:
    copyOverIfExists(src, dst, file)

## Create Validation Set

In [27]:
def makeValSet(src):
    classLabel = src.split('/')[-1]
    files = [x for x in os.listdir(src) if '.png' in x]
    shuffle(files)
    files = files[0: int(0.1 * len(files))]
    print(len(files))
    for file in files:
        dst = os.path.join('../imgs/validation/{}'.format(classLabel))
        copyOverIfExists(src, dst, file)

In [28]:
srcs = ['../imgs/train/' + x for x in os.listdir('../imgs/train/')]
print(srcs)

['../imgs/train/$', '../imgs/train/)', '../imgs/train/L', '../imgs/train/H', '../imgs/train/N', '../imgs/train/2', '../imgs/train/I', '../imgs/train/D', '../imgs/train/b', '../imgs/train/1', '../imgs/train/f', '../imgs/train/J', '../imgs/train/E', '../imgs/train/Y', '../imgs/train/h', '../imgs/train/e', '../imgs/train/7', '../imgs/train/@', '../imgs/train/C', '../imgs/train/r', '../imgs/train/#', '../imgs/train/F', '../imgs/train/q', '../imgs/train/X', '../imgs/train/P', '../imgs/train/(', '../imgs/train/0', '../imgs/train/A', '../imgs/train/6', '../imgs/train/O', '../imgs/train/W', '../imgs/train/U', '../imgs/train/T', '../imgs/train/9', '../imgs/train/M', '../imgs/train/R', '../imgs/train/V', '../imgs/train/G', '../imgs/train/B', '../imgs/train/5', '../imgs/train/t', '../imgs/train/3', '../imgs/train/8', '../imgs/train/a', '../imgs/train/4', '../imgs/train/slash', '../imgs/train/Z', '../imgs/train/Q', '../imgs/train/n', '../imgs/train/K', '../imgs/train/g', '../imgs/train/d', '../img

In [29]:
for src in srcs:
    makeValSet(src)

240
[INFO] Moving ../imgs/train/$/__6_634336.png to ../imgs/validation/$/__6_634336.png
[INFO] Moving ../imgs/train/$/__16_1717897.png to ../imgs/validation/$/__16_1717897.png
[INFO] Moving ../imgs/train/$/__21_1361250.png to ../imgs/validation/$/__21_1361250.png
[INFO] Moving ../imgs/train/$/__20_6946443.png to ../imgs/validation/$/__20_6946443.png
[INFO] Moving ../imgs/train/$/__16_7960528.png to ../imgs/validation/$/__16_7960528.png
[INFO] Moving ../imgs/train/$/__15_6527856.png to ../imgs/validation/$/__15_6527856.png
[INFO] Moving ../imgs/train/$/__6_8485190.png to ../imgs/validation/$/__6_8485190.png
[INFO] Moving ../imgs/train/$/__24_5591369.png to ../imgs/validation/$/__24_5591369.png
[INFO] Moving ../imgs/train/$/__23_8302420.png to ../imgs/validation/$/__23_8302420.png
[INFO] Moving ../imgs/train/$/__20_8357698.png to ../imgs/validation/$/__20_8357698.png
[INFO] Moving ../imgs/train/$/__0_3385325.png to ../imgs/validation/$/__0_3385325.png
[INFO] Moving ../imgs/train/$/__24_5

[INFO] Moving ../imgs/train/I/EMNIST_43365.png to ../imgs/validation/I/EMNIST_43365.png
[INFO] Moving ../imgs/train/I/EMNIST_33211.png to ../imgs/validation/I/EMNIST_33211.png
[INFO] Moving ../imgs/train/I/EMNIST_18928.png to ../imgs/validation/I/EMNIST_18928.png
[INFO] Moving ../imgs/train/I/EMNIST_8662.png to ../imgs/validation/I/EMNIST_8662.png
[INFO] Moving ../imgs/train/I/EMNIST_89816.png to ../imgs/validation/I/EMNIST_89816.png
[INFO] Moving ../imgs/train/I/EMNIST_100223.png to ../imgs/validation/I/EMNIST_100223.png
[INFO] Moving ../imgs/train/I/EMNIST_39516.png to ../imgs/validation/I/EMNIST_39516.png
[INFO] Moving ../imgs/train/I/EMNIST_57507.png to ../imgs/validation/I/EMNIST_57507.png
[INFO] Moving ../imgs/train/I/EMNIST_27288.png to ../imgs/validation/I/EMNIST_27288.png
[INFO] Moving ../imgs/train/I/EMNIST_63483.png to ../imgs/validation/I/EMNIST_63483.png
[INFO] Moving ../imgs/train/I/EMNIST_68660.png to ../imgs/validation/I/EMNIST_68660.png
[INFO] Moving ../imgs/train/I/EM

245
[INFO] Moving ../imgs/train/E/EMNIST_86570.png to ../imgs/validation/E/EMNIST_86570.png
[INFO] Moving ../imgs/train/E/EMNIST_62077.png to ../imgs/validation/E/EMNIST_62077.png
[INFO] Moving ../imgs/train/E/EMNIST_43357.png to ../imgs/validation/E/EMNIST_43357.png
[INFO] Moving ../imgs/train/E/EMNIST_16086.png to ../imgs/validation/E/EMNIST_16086.png
[INFO] Moving ../imgs/train/E/EMNIST_70847.png to ../imgs/validation/E/EMNIST_70847.png
[INFO] Moving ../imgs/train/E/EMNIST_83520.png to ../imgs/validation/E/EMNIST_83520.png
[INFO] Moving ../imgs/train/E/EMNIST_75990.png to ../imgs/validation/E/EMNIST_75990.png
[INFO] Moving ../imgs/train/E/EMNIST_90094.png to ../imgs/validation/E/EMNIST_90094.png
[INFO] Moving ../imgs/train/E/EMNIST_26722.png to ../imgs/validation/E/EMNIST_26722.png
[INFO] Moving ../imgs/train/E/EMNIST_50416.png to ../imgs/validation/E/EMNIST_50416.png
[INFO] Moving ../imgs/train/E/EMNIST_8273.png to ../imgs/validation/E/EMNIST_8273.png
[INFO] Moving ../imgs/train/E/

[INFO] Moving ../imgs/train/C/EMNIST_31676.png to ../imgs/validation/C/EMNIST_31676.png
[INFO] Moving ../imgs/train/C/EMNIST_109654.png to ../imgs/validation/C/EMNIST_109654.png
[INFO] Moving ../imgs/train/C/EMNIST_6582.png to ../imgs/validation/C/EMNIST_6582.png
[INFO] Moving ../imgs/train/C/EMNIST_91219.png to ../imgs/validation/C/EMNIST_91219.png
[INFO] Moving ../imgs/train/C/EMNIST_53619.png to ../imgs/validation/C/EMNIST_53619.png
[INFO] Moving ../imgs/train/C/EMNIST_104462.png to ../imgs/validation/C/EMNIST_104462.png
[INFO] Moving ../imgs/train/C/EMNIST_49507.png to ../imgs/validation/C/EMNIST_49507.png
[INFO] Moving ../imgs/train/C/EMNIST_100133.png to ../imgs/validation/C/EMNIST_100133.png
[INFO] Moving ../imgs/train/C/EMNIST_60074.png to ../imgs/validation/C/EMNIST_60074.png
[INFO] Moving ../imgs/train/C/EMNIST_111074.png to ../imgs/validation/C/EMNIST_111074.png
[INFO] Moving ../imgs/train/C/EMNIST_69019.png to ../imgs/validation/C/EMNIST_69019.png
[INFO] Moving ../imgs/trai

[INFO] Moving ../imgs/train/0/EMNIST_1844.png to ../imgs/validation/0/EMNIST_1844.png
[INFO] Moving ../imgs/train/0/EMNIST_15401.png to ../imgs/validation/0/EMNIST_15401.png
[INFO] Moving ../imgs/train/0/EMNIST_65841.png to ../imgs/validation/0/EMNIST_65841.png
[INFO] Moving ../imgs/train/0/EMNIST_58598.png to ../imgs/validation/0/EMNIST_58598.png
[INFO] Moving ../imgs/train/0/EMNIST_44624.png to ../imgs/validation/0/EMNIST_44624.png
[INFO] Moving ../imgs/train/0/EMNIST_25786.png to ../imgs/validation/0/EMNIST_25786.png
[INFO] Moving ../imgs/train/0/EMNIST_17298.png to ../imgs/validation/0/EMNIST_17298.png
[INFO] Moving ../imgs/train/0/EMNIST_41471.png to ../imgs/validation/0/EMNIST_41471.png
[INFO] Moving ../imgs/train/0/EMNIST_7233.png to ../imgs/validation/0/EMNIST_7233.png
[INFO] Moving ../imgs/train/0/EMNIST_108828.png to ../imgs/validation/0/EMNIST_108828.png
[INFO] Moving ../imgs/train/0/EMNIST_111363.png to ../imgs/validation/0/EMNIST_111363.png
[INFO] Moving ../imgs/train/0/EM

[INFO] Moving ../imgs/train/T/EMNIST_47431.png to ../imgs/validation/T/EMNIST_47431.png
[INFO] Moving ../imgs/train/T/EMNIST_97314.png to ../imgs/validation/T/EMNIST_97314.png
[INFO] Moving ../imgs/train/T/EMNIST_97640.png to ../imgs/validation/T/EMNIST_97640.png
[INFO] Moving ../imgs/train/T/EMNIST_49492.png to ../imgs/validation/T/EMNIST_49492.png
[INFO] Moving ../imgs/train/T/EMNIST_60275.png to ../imgs/validation/T/EMNIST_60275.png
[INFO] Moving ../imgs/train/T/EMNIST_42031.png to ../imgs/validation/T/EMNIST_42031.png
[INFO] Moving ../imgs/train/T/EMNIST_100276.png to ../imgs/validation/T/EMNIST_100276.png
[INFO] Moving ../imgs/train/T/EMNIST_5801.png to ../imgs/validation/T/EMNIST_5801.png
[INFO] Moving ../imgs/train/T/EMNIST_15357.png to ../imgs/validation/T/EMNIST_15357.png
[INFO] Moving ../imgs/train/T/EMNIST_90777.png to ../imgs/validation/T/EMNIST_90777.png
[INFO] Moving ../imgs/train/T/EMNIST_79850.png to ../imgs/validation/T/EMNIST_79850.png
[INFO] Moving ../imgs/train/T/EM

[INFO] Moving ../imgs/train/G/EMNIST_48983.png to ../imgs/validation/G/EMNIST_48983.png
[INFO] Moving ../imgs/train/G/EMNIST_95090.png to ../imgs/validation/G/EMNIST_95090.png
[INFO] Moving ../imgs/train/G/EMNIST_26169.png to ../imgs/validation/G/EMNIST_26169.png
[INFO] Moving ../imgs/train/G/EMNIST_65901.png to ../imgs/validation/G/EMNIST_65901.png
[INFO] Moving ../imgs/train/G/EMNIST_32496.png to ../imgs/validation/G/EMNIST_32496.png
[INFO] Moving ../imgs/train/G/EMNIST_3843.png to ../imgs/validation/G/EMNIST_3843.png
[INFO] Moving ../imgs/train/G/EMNIST_15842.png to ../imgs/validation/G/EMNIST_15842.png
[INFO] Moving ../imgs/train/G/EMNIST_104756.png to ../imgs/validation/G/EMNIST_104756.png
[INFO] Moving ../imgs/train/G/EMNIST_8412.png to ../imgs/validation/G/EMNIST_8412.png
[INFO] Moving ../imgs/train/G/EMNIST_84808.png to ../imgs/validation/G/EMNIST_84808.png
[INFO] Moving ../imgs/train/G/EMNIST_90223.png to ../imgs/validation/G/EMNIST_90223.png
[INFO] Moving ../imgs/train/G/EMNI

[INFO] Moving ../imgs/train/t/EMNIST_41005.png to ../imgs/validation/t/EMNIST_41005.png
[INFO] Moving ../imgs/train/t/EMNIST_108312.png to ../imgs/validation/t/EMNIST_108312.png
[INFO] Moving ../imgs/train/t/EMNIST_80553.png to ../imgs/validation/t/EMNIST_80553.png
[INFO] Moving ../imgs/train/t/EMNIST_1099.png to ../imgs/validation/t/EMNIST_1099.png
[INFO] Moving ../imgs/train/t/EMNIST_18023.png to ../imgs/validation/t/EMNIST_18023.png
[INFO] Moving ../imgs/train/t/EMNIST_84219.png to ../imgs/validation/t/EMNIST_84219.png
[INFO] Moving ../imgs/train/t/EMNIST_98024.png to ../imgs/validation/t/EMNIST_98024.png
[INFO] Moving ../imgs/train/t/EMNIST_55330.png to ../imgs/validation/t/EMNIST_55330.png
[INFO] Moving ../imgs/train/t/EMNIST_110246.png to ../imgs/validation/t/EMNIST_110246.png
[INFO] Moving ../imgs/train/t/EMNIST_52414.png to ../imgs/validation/t/EMNIST_52414.png
[INFO] Moving ../imgs/train/t/EMNIST_111325.png to ../imgs/validation/t/EMNIST_111325.png
[INFO] Moving ../imgs/train/

[INFO] Moving ../imgs/train/Q/EMNIST_26173.png to ../imgs/validation/Q/EMNIST_26173.png
[INFO] Moving ../imgs/train/Q/EMNIST_80931.png to ../imgs/validation/Q/EMNIST_80931.png
[INFO] Moving ../imgs/train/Q/EMNIST_71681.png to ../imgs/validation/Q/EMNIST_71681.png
[INFO] Moving ../imgs/train/Q/EMNIST_2581.png to ../imgs/validation/Q/EMNIST_2581.png
[INFO] Moving ../imgs/train/Q/EMNIST_22102.png to ../imgs/validation/Q/EMNIST_22102.png
[INFO] Moving ../imgs/train/Q/EMNIST_40265.png to ../imgs/validation/Q/EMNIST_40265.png
[INFO] Moving ../imgs/train/Q/EMNIST_45029.png to ../imgs/validation/Q/EMNIST_45029.png
[INFO] Moving ../imgs/train/Q/EMNIST_30950.png to ../imgs/validation/Q/EMNIST_30950.png
[INFO] Moving ../imgs/train/Q/EMNIST_35310.png to ../imgs/validation/Q/EMNIST_35310.png
[INFO] Moving ../imgs/train/Q/EMNIST_84749.png to ../imgs/validation/Q/EMNIST_84749.png
[INFO] Moving ../imgs/train/Q/EMNIST_77669.png to ../imgs/validation/Q/EMNIST_77669.png
[INFO] Moving ../imgs/train/Q/EMNI