# Initialize a fresh test data dir

Includes horiz and vertical symmetry manipulations

In [1]:
import os
import shutil
import tensorflow as tf
from tensorflow import keras
import numpy as np
from numpy import asarray
import h5py
from glob import glob
import json
import sys
import random
from PIL import Image
import matplotlib.pyplot as plt

In [2]:
np.random.seed(123)
tf.random.set_seed(123)

In [3]:
new_data = './dcai_gcb_00/dcai_gcb_00'

In [4]:
n_val = 100

numerals = ["i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix", "x"]

val_dir = new_data + '/val'
if os.path.exists(val_dir):
    shutil.rmtree(val_dir)

for i, num in enumerate(numerals):
    num_dir = new_data + '/train/{}/'.format(num)
    os.makedirs(num_dir.replace('/train/', '/val/'))
        
    num_pattern = new_data + '/train/{}/*.png'.format(num)
    fps = glob(num_pattern)
    random.shuffle(fps)
    choices = np.random.choice(np.arange(len(fps)), size=n_val, replace=False)
    for choice in choices:
        source = fps[choice]
        dest = source.replace('/train/', '/val/')
        shutil.move(source, dest)

In [5]:
horiz_sym = ['i', 'ii', 'iii', 'x', 'v']
vert_sym = ['i', 'ii', 'iii', 'x', 'ix']

for num in horiz_sym:
    num_dir = new_data + '/train/{}/*.png'.format(num)
    fps = [fp for fp in glob(num_dir) if '_vert' not in fp and '_horiz' not in fp]
    for fp in fps:
        image = Image.open(fp)
        arr = asarray(image)
        arr2 = arr[:, ::-1]  # horizontal sym
        image2 = Image.fromarray(arr2)
        image2.save(fp.replace('.png', '_horiz.png'))
        
for num in vert_sym:
    num_dir = new_data + '/train/{}/*.png'.format(num)
    fps = [fp for fp in glob(num_dir) if '_vert' not in fp and '_horiz' not in fp]
    for fp in fps:
        image = Image.open(fp)
        arr = asarray(image)
        arr2 = arr[::-1, :]  # vert sym
        image2 = Image.fromarray(arr2)
        image2.save(fp.replace('.png', '_vert.png'))

In [6]:
for num, target in (('vi', 'iv'), ('iv', 'vi')):
    num_dir = new_data + '/train/{}/*.png'.format(num)
    fps = glob(num_dir)
    for fp in fps:
        image = Image.open(fp)
        arr = asarray(image)
        arr2 = arr[:, ::-1]  # horizontal sym
        image2 = Image.fromarray(arr2)
        image2.save(fp.replace(num, target).replace('.png', '_horiz.png'))

In [7]:
for n in numerals:
    num_pattern = new_data + '/*/{}/*.png'.format(n)
    fps = glob(num_pattern)
    n_required = 1000 - len(fps)
    print('Need {} more files for numeral "{}"'.format(n_required, n))

Need 240 more files for numeral "i"
Need 588 more files for numeral "ii"
Need 594 more files for numeral "iii"
Need 538 more files for numeral "iv"
Need 614 more files for numeral "v"
Need 406 more files for numeral "vi"
Need 753 more files for numeral "vii"
Need 744 more files for numeral "viii"
Need 544 more files for numeral "ix"
Need 501 more files for numeral "x"
