# CREATE A VALIDATION SET AND JSON FILE

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
import os
import tensorflow as tf
import numpy as np

SEED = 345
tf.random.set_seed(SEED)  

cwd = os.getcwd()

### READ THE KAGGLE DATASET AND SPLIT IN TRAINING/VALIDATION SETS

In [3]:
## I want to split my dataset in training and validation set
startDataset = os.path.join(cwd, 'startDataset')

className = next(os.walk('./startDataset/training'))[1]                        
dataset = None
for cl in className:
    imgs = tf.data.Dataset.list_files(file_pattern='./startDataset/training/'+cl+'/*.jpg')
    numFiles = int(tf.data.experimental.cardinality(imgs))
    targets_tensor = tf.fill([numFiles],cl)
    targets = tf.data.Dataset.from_tensor_slices(targets_tensor)
    if dataset is None:
        dataset = tf.data.Dataset.zip((imgs,targets))
    else:
        dataset = tf.data.Dataset.zip((imgs,targets)).concatenate(dataset)
        

In [4]:
validation_split=0.5

numElement = int(tf.data.experimental.cardinality(dataset)) # It contains 1554 elements (all images associated with their classes)
dataset = dataset.shuffle(buffer_size=numElement, seed=SEED) # Shuffle the data

train_ds= dataset.skip(int(validation_split*numElement))
valid_ds= dataset.take(int(validation_split*numElement))

int(tf.data.experimental.cardinality(train_ds))
int(tf.data.experimental.cardinality(valid_ds))

777

777

### CREATA A NEW DATABASE DIRECTORY WITH SPLITTED TRAINING AND VALIDATION SETS

In [5]:
from PIL import Image
import re
import shutil
#Create new database directory with my new training and validation dataset
newDataset_dir = os.path.join(cwd, 'newDataset_02')
shutil.rmtree(newDataset_dir, ignore_errors=True)
if not os.path.exists(newDataset_dir):
    os.makedirs(newDataset_dir)
 
classes = [ 'owl',    # 0
            'galaxy', # 1
            'lightning', # 2
            'wine-bottle', # 3
            't-shirt', # 4
            'waterfall', # 5
            'sword', # 6
            'school-bus', # 7
            'calculator', # 8
            'sheet-music', # 9
            'airplanes', # 10
            'lightbulb', # 11
            'skyscraper', # 12
            'mountain-bike', # 13
            'fireworks', # 14
            'computer-monitor', # 15
            'bear', # 16
            'grand-piano', # 17
            'kangaroo', # 18
            'laptop']       # 19

    
    
file_dict={} #dictionary to create a JSON file
file_dict["training"] = {}
file_dict["validation"] = {}

for cl in classes:
    train_dict = file_dict["training"]
    valid_dict = file_dict["validation"]
    train_dict[cl] = []
    valid_dict[cl] = []

train_dir = os.path.join(newDataset_dir, 'training')
for path_img, class_name in train_ds:
    img = Image.open(path_img.numpy().decode("utf-8"))
    class_name = class_name.numpy().decode("utf-8")
    class_dir = os.path.join(train_dir,class_name)
    matchObj = re.search(r'[^\\/]+$',img.filename,re.M)
    file_name = matchObj.group()
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)
    img.save(os.path.join(class_dir,file_name))
    class_dict= file_dict['training']
    img_array = class_dict[class_name]
    img_array.append(file_name)
    

valid_dir = os.path.join(newDataset_dir, 'validation')
for path_img, class_name in valid_ds:
    img = Image.open(path_img.numpy().decode("utf-8"))
    class_name = class_name.numpy().decode("utf-8")
    class_dir = os.path.join(valid_dir,class_name)
    matchObj = re.search(r'[^\\/]+$',img.filename,re.M)
    file_name = matchObj.group()
    if not os.path.exists(class_dir):
        os.makedirs(class_dir)
    img.save(os.path.join(class_dir,file_name))
    class_dict= file_dict['validation']
    img_array = class_dict[class_name]
    img_array.append(file_name)
            

### CREATE A JSON FILE IN JSON_dir

In [6]:
#Create json file
import json
import os
output_dir = os.path.join(cwd,'JSON_dir')

if not os.path.exists(output_dir):
        os.makedirs(output_dir)
        
json_file = 'dataset_split_05.json'

with open(os.path.join(output_dir,json_file), 'w') as json_file:
  json.dump(file_dict, json_file,indent=2)