## Data preparation for cyclegan
This notebook will allow us to move the right files to prepare cyclegan from one illuminant to another.

In [2]:
import os
import pandas as pd
import numpy
import sys
import shutil

In [21]:
pd.read_csv('./data/train/scene_abandonned_city_54/2500/E/dataset_2500_E.csv').columns


Index(['Unnamed: 0', 'scene', 'illuminant', 'scene_center_x', 'scene_center_y',
       'scene_center_z', 'rendered_image', 'camera_location_x',
       'camera_location_y', 'camera_location_z', 'NW', 'N', 'NE', 'E', 'SE',
       'S', 'SW', 'W', 'white_balance', 'exposure_compensation',
       'pointlight_intensity'],
      dtype='object')

In [50]:
# Desired illuminants {A, B}
color_temperatures = {'A': 2500, 'B': 2500}
locations = {'A': 'SW','B': 'SE'}

destination_dataset_path = './datasets/UELighting/'

train_path = './data/train/'
test_path = './data/test/'
validate_path = './data/validate/'

train_scenes = os.listdir(train_path)
test_scenes = os.listdir(test_path)
validate_scenes = os.listdir(validate_path)


col_names = ['Unnamed: 0', 'scene', 'illuminant', 'scene_center_x', 'scene_center_y',\
             'scene_center_z', 'rendered_image', 'camera_location_x',\
             'camera_location_y', 'camera_location_z', 'NW', 'N', 'NE', 'E', 'SE',\
             'S', 'SW', 'W', 'white_balance', 'exposure_compensation','pointlight_intensity']

dataset_B = pd.DataFrame(columns = col_names)

def generate_dataset(illuminant = 'A', step = 'train', columns = col_names):
    dataset  = pd.DataFrame(columns = col_names)
    for index, scene in enumerate(os.listdir(os.path.join('data', step))):
        data_path = os.path.join('data', step, scene, str(color_temperatures[illuminant]), locations[illuminant])
        dataset_name = 'dataset_' + str(color_temperatures[illuminant]) + '_' + locations[illuminant] + '.csv'
        data = pd.read_csv(os.path.join(data_path, dataset_name))
        # Rename images and copy them to avoid duplicates
        for image_name in data['rendered_image']:
            old_image_name = os.path.join(data_path, image_name)        
            # Separate base from extension
            base, extension = os.path.splitext(image_name)        
            new_image_name = os.path.join(destination_dataset_path, step, illuminant, str(index) + '_' + base + extension)
            shutil.copy(old_image_name, new_image_name)
        data['rendered_image'] = data['rendered_image'].map(lambda image_name:\
                                                            str(index) + '_' + image_name)
        dataset = dataset.append(data)
    dataset.to_csv(os.path.join(destination_dataset_path, 'train', 'A', 'dataset_A.csv'))
    return dataset

dataset_A = generate_dataset(illuminant = 'A', step = 'train', columns = col_names)
dataset_A.head()

Unnamed: 0.1,Unnamed: 0,scene,illuminant,scene_center_x,scene_center_y,scene_center_z,rendered_image,camera_location_x,camera_location_y,camera_location_z,...,N,NE,E,SE,S,SW,W,white_balance,exposure_compensation,pointlight_intensity
0,30,sm_en_pipe_02_132020117142248,2500.0,-4965.74707,4025.404297,1731.2854,0_image30.png,-5351.129883,4026.94873,1841.681152,...,0,0,0,0,0,1,0,6500.0,0.0,431.074921Candelas
1,70,sm_en_pipe_02_102020117142251,2500.0,-4768.251953,4335.400391,1747.966553,0_image70.png,-5060.358887,4279.964844,1810.102905,...,0,0,0,0,0,1,0,6500.0,0.0,326.617767Candelas
2,110,sm_en_Roof_01_01b32020117142254,2500.0,-4224.233398,3687.82373,1660.991577,0_image110.png,-4283.608398,3774.779053,1680.550293,...,0,0,0,0,0,1,0,6500.0,0.0,115.159149Candelas
3,150,sm_en_Roof_01_01b32020117142257,2500.0,-3506.981689,3292.172363,1660.991577,0_image150.png,-3742.747559,3163.341553,1672.897461,...,0,0,0,0,0,1,0,6500.0,0.0,292.93692Candelas
4,190,sm_en_Roof_01_127202011714231,2500.0,-4164.967285,1103.062744,995.453857,0_image190.png,-4198.052246,1839.492188,1278.002075,...,0,0,0,0,0,1,0,6500.0,0.0,848.917419Candelas
