## Data preparation for cyclegan
This notebook will allow us to move the right files to prepare cyclegan from one illuminant to another.

In [1]:
import os
import pandas as pd
import numpy
import sys
import shutil
from pathlib import Path

In [2]:
pd.read_csv('/scratch/barthas/data/train/scene_abandonned_city_54/2500/E/dataset_2500_E.csv').columns


Index(['Unnamed: 0', 'scene', 'illuminant', 'scene_center_x', 'scene_center_y',
       'scene_center_z', 'rendered_image', 'camera_location_x',
       'camera_location_y', 'camera_location_z', 'NW', 'N', 'NE', 'E', 'SE',
       'S', 'SW', 'W', 'white_balance', 'exposure_compensation',
       'pointlight_intensity'],
      dtype='object')

In [3]:
# Desired illuminants {A, B}
color_temperatures = {'A': 2500, 'B': 4500}
locations = {'A': 'SW','B': 'SE'}

destination_dataset_path = '/scratch/barthas/datasets/2500_4500_unaligned/'
Path(destination_dataset_path).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'train')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'test')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'validate')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'train', 'A')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'train', 'B')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'test', 'A')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'test', 'B')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'validate', 'A')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'validate', 'B')).mkdir(parents=True, exist_ok=True)

train_path = '/scratch/barthas/data/train/'
test_path = '/scratch/barthas/data/test/'
validate_path = '/scratch/barthas/data/validate/'

train_scenes = os.listdir(train_path)
test_scenes = os.listdir(test_path)
validate_scenes = os.listdir(validate_path)


col_names = ['Unnamed: 0', 'scene', 'illuminant', 'scene_center_x', 'scene_center_y',\
             'scene_center_z', 'rendered_image', 'camera_location_x',\
             'camera_location_y', 'camera_location_z', 'NW', 'N', 'NE', 'E', 'SE',\
             'S', 'SW', 'W', 'white_balance', 'exposure_compensation','pointlight_intensity']


def generate_dataset_both(illuminant = 'A', step = 'train', columns = col_names):
    dataset  = pd.DataFrame(columns = col_names)
    for index, scene in enumerate(os.listdir(os.path.join('/scratch/barthas/','data', step))):
        data_path = os.path.join('/scratch/barthas/', 'data', step, scene, str(color_temperatures[illuminant]), locations[illuminant])
        Path(data_path).mkdir(parents=True, exist_ok=True)
        dataset_name = 'dataset_' + str(color_temperatures[illuminant]) + '_' + locations[illuminant] + '.csv'
        data = pd.read_csv(os.path.join(data_path, dataset_name))
        # Rename images and copy them to avoid duplicates
        for image_name in data['rendered_image']:
            old_image_name = os.path.join(data_path, image_name)        
            # Separate base from extension
            base, extension = os.path.splitext(image_name)        
            new_image_name = os.path.join(destination_dataset_path, step, illuminant, str(index) + '_' + base + extension)
            shutil.copy(old_image_name, new_image_name)
        data['rendered_image'] = data['rendered_image'].map(lambda image_name:\
                                                            str(index) + '_' + image_name)
        dataset = dataset.append(data)
    #dataset.to_csv(os.path.join(destination_dataset_path, 'train', 'A', 'dataset_A.csv'))
    return dataset

#dataset_A = generate_dataset_both(illuminant = 'A', step = 'train', columns = col_names)
#dataset_A.head()

locations_list = ['SW', 'S', 'SE', 'E', 'NE', 'N', 'NW', 'W']

def generate_dataset_color_temperature(illuminant = 'A', step ='train', columns = col_names):
    dataset  = pd.DataFrame(columns = col_names)
    for index, scene in enumerate(os.listdir(os.path.join('/scratch/barthas/','data', step))):
        for location in locations_list:
            data_path = os.path.join('/scratch/barthas/','data', step, scene, str(color_temperatures[illuminant]), location)
            dataset_name = 'dataset_' + str(color_temperatures[illuminant]) + '_' + location + '.csv'
            data = pd.read_csv(os.path.join(data_path, dataset_name))
            # Rename images and copy them to avoid duplicates
            for image_name in data['rendered_image']:
                old_image_name = os.path.join(data_path, image_name)        
                # Separate base from extension
                base, extension = os.path.splitext(image_name)        
                new_image_name = os.path.join(destination_dataset_path, step, illuminant, str(index) + '_' + base + extension)
                shutil.copy(old_image_name, new_image_name)
            data['rendered_image'] = data['rendered_image'].map(lambda image_name:\
                                                                str(index) + '_' + image_name)
            dataset = dataset.append(data)
    #dataset.to_csv(os.path.join(destination_dataset_path, 'train', 'A', 'dataset_A.csv'))
    return dataset

In [4]:
dataset_A = generate_dataset_color_temperature(illuminant = 'A', step = 'train', columns = col_names)
dataset_A.head()
#dataset_A = generate_dataset_both(illuminant = 'A', step = 'train', columns = col_names)
#dataset_A.head()


OSError: [Errno 28] No space left on device

In [None]:
dataset_B = generate_dataset_color_temperature(illuminant = 'B', step = 'train', columns = col_names)
dataset_B.head()
#dataset_B = generate_dataset_both(illuminant = 'B', step = 'train', columns = col_names)
#dataset_B.head()

In [None]:
dataset_C = generate_dataset_color_temperature(illuminant = 'A', step = 'test', columns = col_names)
dataset_C.head()
#dataset_C = generate_dataset_both(illuminant = 'A', step = 'test', columns = col_names)
#dataset_C.head()

In [None]:
dataset_D = generate_dataset_color_temperature(illuminant = 'B', step = 'test', columns = col_names)
dataset_D.head()
#dataset_D = generate_dataset_both(illuminant = 'B', step = 'test', columns = col_names)
#dataset_D.head()

In [None]:
dataset_E = generate_dataset_color_temperature(illuminant = 'A', step = 'validate', columns = col_names)
dataset_E.head()
#dataset_E = generate_dataset_both(illuminant = 'A', step = 'validate', columns = col_names)
#dataset_E.head()

In [None]:
dataset_F = generate_dataset_color_temperature(illuminant = 'B', step = 'validate', columns = col_names)
dataset_F.head()
#dataset_F = generate_dataset_both(illuminant = 'B', step = 'validate', columns = col_names)
#dataset_F.head()