# Creating folders with synthetic data.

In this notebook https://github.com/albertovpd/viu_tfm-deep_vision_classification/blob/kfolds_validation/src/creating_5_subfolders_for_kfoldslike_validation.ipynb were did the following:
- For all available data, it was shuffled and taken 150 pics of each class for the test set.
- The remaining pics were at first shuffled again, and then distributed in train/validation folders (80-20%).

Now, 3 new folders will be created with 50, 250 and 480 synthetic pics for each class. So:
- The same 150 pics are taken for the test set.
- The same train/validation distribution (80-20%) is taken.
- 50 synth pics of each class are randomly chosen, added to the real ones to create the folder *"synth50_train_val_ds"*. The same process is repeated with 250 and 480 synth pics.

In [1]:
# local path
from dotenv import load_dotenv
import os
import numpy as np
import shutil
import random
import shutil

In [2]:
# local
load_dotenv()
base_folder = os.environ.get("INPUT_PATH")

real_input  = base_folder + "dataset_1test_5trainval_folders/train_val_ds/"
synth_input = base_folder + "common_misclassifications/mosaics/"
synth_output= base_folder + "dataset_synth_data-1test_3trainval/train_val_ds/"

classes = {
    'Dinning/' : 'dinning_fakes/', 
   'Bedroom/' : 'bedroom_fakes/',
   'Livingroom/' : 'livingroom_fakes/',
   'Kitchen/' : 'kitchen_fakes/',
   'Bathroom/': 'bathroom_fakes/'
            }

new_folders = {
    "synth50_train_val_ds/" : 50, 
    "synth250_train_val_ds/": 250, 
    "synth480_train_val_ds/": 480
                }

In [3]:
# copying all real pics to the 3 new folders
for n in new_folders:
    for c in classes:
        
        # create the new folders
        os.makedirs(synth_output + n + c, exist_ok = True)
        
        # copying real pics to new folders
        real_pics = os.listdir(real_input+c)
        for r in real_pics:
            shutil.copyfile(real_input + c + r, synth_output + n + c + r)

In [4]:
# copying fake pics to the new folders
for n in new_folders:
    for c in classes:
        
        # locate fake pics and shuffle them
        fake_pics = os.listdir(synth_input+classes[c])
        np.random.seed()
        np.random.shuffle(fake_pics)
        
        # create a list of 50, 250 and 480 synthetic shuffled pics
        fake_shuffled, _ = np.split(np.array(fake_pics),[new_folders[n]])
        #print(n, c, len(fake_shuffled))
        
        # copy that pics into the new folders
        for f in fake_shuffled:
            shutil.copyfile(synth_input + classes[c] + f, synth_output + n + c + f)
            #print(synth_output + n + c+f)