## Data preparation for cyclegan
This notebook will allow us to move the right files to prepare cyclegan from one illuminant to another.

In [25]:
import os
import pandas as pd
import numpy
import sys
import shutil
from pathlib import Path

In [2]:
pd.read_csv('/scratch/barthas/data/train/scene_abandonned_city_54/2500/E/dataset_2500_E.csv').columns


Index(['Unnamed: 0', 'scene', 'illuminant', 'scene_center_x', 'scene_center_y',
       'scene_center_z', 'rendered_image', 'camera_location_x',
       'camera_location_y', 'camera_location_z', 'NW', 'N', 'NE', 'E', 'SE',
       'S', 'SW', 'W', 'white_balance', 'exposure_compensation',
       'pointlight_intensity'],
      dtype='object')

In [24]:
# Desired illuminants {A, B}
color_temperatures = {'A': 2500, 'B': 4500}
locations = {'A': 'SW','B': 'SE'}

destination_dataset_path = './datasets/2500_4500/'
Path(destination_dataset_path).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'train')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'test')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'validate')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'train', 'A')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'train', 'B')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'test', 'A')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'test', 'B')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'validate', 'A')).mkdir(parents=True, exist_ok=True)
Path(os.path.join(destination_dataset_path, 'validate', 'B')).mkdir(parents=True, exist_ok=True)

train_path = './data/train/'
test_path = './data/test/'
validate_path = './data/validate/'

train_scenes = os.listdir(train_path)
test_scenes = os.listdir(test_path)
validate_scenes = os.listdir(validate_path)


col_names = ['Unnamed: 0', 'scene', 'illuminant', 'scene_center_x', 'scene_center_y',\
             'scene_center_z', 'rendered_image', 'camera_location_x',\
             'camera_location_y', 'camera_location_z', 'NW', 'N', 'NE', 'E', 'SE',\
             'S', 'SW', 'W', 'white_balance', 'exposure_compensation','pointlight_intensity']


def generate_dataset_both(illuminant = 'A', step = 'train', columns = col_names):
    dataset  = pd.DataFrame(columns = col_names)
    for index, scene in enumerate(os.listdir(os.path.join('.','data', step))):
        data_path = os.path.join('.', 'data', step, scene, str(color_temperatures[illuminant]), locations[illuminant])
        Path(data_path).mkdir(parents=True, exist_ok=True)
        dataset_name = 'dataset_' + str(color_temperatures[illuminant]) + '_' + locations[illuminant] + '.csv'
        data = pd.read_csv(os.path.join(data_path, dataset_name))
        # Rename images and copy them to avoid duplicates
        for image_name in data['rendered_image']:
            old_image_name = os.path.join(data_path, image_name)        
            # Separate base from extension
            base, extension = os.path.splitext(image_name)        
            new_image_name = os.path.join(destination_dataset_path, step, illuminant, str(index) + '_' + base + extension)
            shutil.copy(old_image_name, new_image_name)
        data['rendered_image'] = data['rendered_image'].map(lambda image_name:\
                                                            str(index) + '_' + image_name)
        dataset = dataset.append(data)
    #dataset.to_csv(os.path.join(destination_dataset_path, 'train', 'A', 'dataset_A.csv'))
    return dataset

#dataset_A = generate_dataset_both(illuminant = 'A', step = 'train', columns = col_names)
#dataset_A.head()

locations_list = ['SW', 'S', 'SE', 'E', 'NE', 'N', 'NW', 'W']

def generate_dataset_color_temperature(illuminant = 'A', step ='train', columns = col_names):
    dataset  = pd.DataFrame(columns = col_names)
    for index, scene in enumerate(os.listdir(os.path.join('.','data', step))):
        for location in locations_list:
            data_path = os.path.join('.','data', step, scene, str(color_temperatures[illuminant]), location)
            dataset_name = 'dataset_' + str(color_temperatures[illuminant]) + '_' + location + '.csv'
            data = pd.read_csv(os.path.join(data_path, dataset_name))
            # Rename images and copy them to avoid duplicates
            for image_name in data['rendered_image']:
                old_image_name = os.path.join(data_path, image_name)        
                # Separate base from extension
                base, extension = os.path.splitext(image_name)        
                new_image_name = os.path.join(destination_dataset_path, step, illuminant, str(index) + '_' + base + extension)
                shutil.copy(old_image_name, new_image_name)
            data['rendered_image'] = data['rendered_image'].map(lambda image_name:\
                                                                str(index) + '_' + image_name)
            dataset = dataset.append(data)
    #dataset.to_csv(os.path.join(destination_dataset_path, 'train', 'A', 'dataset_A.csv'))
    return dataset

In [4]:
dataset_A = generate_dataset_color_temperature(illuminant = 'A', step = 'train', columns = col_names)
dataset_A.head()
#dataset_A = generate_dataset_both(illuminant = 'A', step = 'train', columns = col_names)
#dataset_A.head()


Unnamed: 0.1,Unnamed: 0,scene,illuminant,scene_center_x,scene_center_y,scene_center_z,rendered_image,camera_location_x,camera_location_y,camera_location_z,...,N,NE,E,SE,S,SW,W,white_balance,exposure_compensation,pointlight_intensity
0,30,Landscape12020119222725,2500.0,15129.172852,10193.94043,263.128418,0_image30.png,15363.064453,10035.955078,386.719604,...,0,0,0,0,0,1,0,6500.0,0.0,331.325653Candelas
1,70,S_Cliff_Large_142020119222734,2500.0,13883.899414,8892.037109,102.170166,0_image70.png,14086.230469,8951.983398,174.300888,...,0,0,0,0,0,1,0,6500.0,0.0,239.806686Candelas
2,110,S_Cliff_Large_142020119222738,2500.0,13877.6875,8818.366211,112.674622,0_image110.png,14010.418945,8715.196289,150.788803,...,0,0,0,0,0,1,0,6500.0,0.0,185.359619Candelas
3,150,Signpost_OneDir32020119222745,2500.0,13502.585938,8283.014648,376.52713,0_image150.png,13822.921875,8197.805664,435.604462,...,0,0,0,0,0,1,0,6500.0,0.0,362.055756Candelas
4,190,WoodenFence_Small72020119222757,2500.0,13357.870117,7861.131836,222.214508,0_image190.png,13488.060547,7824.246582,267.690277,...,0,0,0,0,0,1,0,6500.0,0.0,153.50145Candelas


In [5]:
dataset_B = generate_dataset_color_temperature(illuminant = 'B', step = 'train', columns = col_names)
dataset_B.head()
#dataset_B = generate_dataset_both(illuminant = 'B', step = 'train', columns = col_names)
#dataset_B.head()

Unnamed: 0.1,Unnamed: 0,scene,illuminant,scene_center_x,scene_center_y,scene_center_z,rendered_image,camera_location_x,camera_location_y,camera_location_z,...,N,NE,E,SE,S,SW,W,white_balance,exposure_compensation,pointlight_intensity
0,34,Landscape12020119222725,6500.0,15129.172852,10193.94043,263.128418,0_image34.png,15363.064453,10035.955078,386.719604,...,0,0,0,0,0,1,0,6500.0,0.0,331.325653Candelas
1,74,S_Cliff_Large_142020119222734,6500.0,13883.899414,8892.037109,102.170166,0_image74.png,14086.230469,8951.983398,174.300888,...,0,0,0,0,0,1,0,6500.0,0.0,239.806686Candelas
2,114,S_Cliff_Large_142020119222738,6500.0,13877.6875,8818.366211,112.674622,0_image114.png,14010.418945,8715.196289,150.788803,...,0,0,0,0,0,1,0,6500.0,0.0,185.359619Candelas
3,154,Signpost_OneDir32020119222745,6500.0,13502.585938,8283.014648,376.52713,0_image154.png,13822.921875,8197.805664,435.604462,...,0,0,0,0,0,1,0,6500.0,0.0,362.055756Candelas
4,194,WoodenFence_Small72020119222757,6500.0,13357.870117,7861.131836,222.214508,0_image194.png,13488.060547,7824.246582,267.690277,...,0,0,0,0,0,1,0,6500.0,0.0,153.50145Candelas


In [26]:
dataset_C = generate_dataset_color_temperature(illuminant = 'A', step = 'test', columns = col_names)
dataset_C.head()
#dataset_C = generate_dataset_both(illuminant = 'A', step = 'test', columns = col_names)
#dataset_C.head()

Unnamed: 0.1,Unnamed: 0,scene,illuminant,scene_center_x,scene_center_y,scene_center_z,rendered_image,camera_location_x,camera_location_y,camera_location_z,...,N,NE,E,SE,S,SW,W,white_balance,exposure_compensation,pointlight_intensity
0,30,BP_Castle01202012111252,2500.0,-288.643311,-4780.570313,1435.449219,0_image30.png,-494.628357,-4455.423828,1508.005981,...,0,0,0,0,0,1,0,6500.0,0.0,421.177826Candelas
1,70,BP_Castle01202012111310,2500.0,191.438965,-3981.077881,1641.610596,0_image70.png,-453.2258,-4645.415039,2266.963623,...,0,0,0,0,0,1,0,6500.0,0.0,1201.268921Candelas
2,110,BP_Castle01202012111313,2500.0,-194.687317,-3330.144287,2161.5,0_image110.png,-468.776215,-3807.928711,2345.078613,...,0,0,0,0,0,1,0,6500.0,0.0,624.329651Candelas
3,150,BP_Castle01202012111317,2500.0,-1458.278198,-3447.679932,2174.355469,0_image150.png,-985.7948,-3646.764404,2281.564209,...,0,0,0,0,0,1,0,6500.0,0.0,563.247864Candelas
4,190,BP_Castle01202012111324,2500.0,-3057.431885,-3972.005859,2453.312012,0_image190.png,-2999.455566,-3284.899658,2737.53833,...,0,0,0,0,0,1,0,6500.0,0.0,801.995483Candelas


In [27]:
dataset_D = generate_dataset_color_temperature(illuminant = 'B', step = 'test', columns = col_names)
dataset_D.head()
#dataset_D = generate_dataset_both(illuminant = 'B', step = 'test', columns = col_names)
#dataset_D.head()

Unnamed: 0.1,Unnamed: 0,scene,illuminant,scene_center_x,scene_center_y,scene_center_z,rendered_image,camera_location_x,camera_location_y,camera_location_z,...,N,NE,E,SE,S,SW,W,white_balance,exposure_compensation,pointlight_intensity
0,32,BP_Castle01202012111252,4500.0,-288.643311,-4780.570313,1435.449219,0_image32.png,-494.628357,-4455.423828,1508.005981,...,0,0,0,0,0,1,0,6500.0,0.0,421.177826Candelas
1,72,BP_Castle01202012111310,4500.0,191.438965,-3981.077881,1641.610596,0_image72.png,-453.2258,-4645.415039,2266.963623,...,0,0,0,0,0,1,0,6500.0,0.0,1201.268921Candelas
2,112,BP_Castle01202012111313,4500.0,-194.687317,-3330.144287,2161.5,0_image112.png,-468.776215,-3807.928711,2345.078613,...,0,0,0,0,0,1,0,6500.0,0.0,624.329651Candelas
3,152,BP_Castle01202012111317,4500.0,-1458.278198,-3447.679932,2174.355469,0_image152.png,-985.7948,-3646.764404,2281.564209,...,0,0,0,0,0,1,0,6500.0,0.0,563.247864Candelas
4,192,BP_Castle01202012111324,4500.0,-3057.431885,-3972.005859,2453.312012,0_image192.png,-2999.455566,-3284.899658,2737.53833,...,0,0,0,0,0,1,0,6500.0,0.0,801.995483Candelas


In [None]:
dataset_E = generate_dataset_color_temperature(illuminant = 'A', step = 'validate', columns = col_names)
dataset_E.head()
#dataset_E = generate_dataset_both(illuminant = 'A', step = 'validate', columns = col_names)
#dataset_E.head()

In [None]:
dataset_F = generate_dataset_color_temperature(illuminant = 'B', step = 'validate', columns = col_names)
dataset_F.head()
#dataset_F = generate_dataset_both(illuminant = 'B', step = 'validate', columns = col_names)
#dataset_F.head()

In [17]:
import glob

for filename in glob.glob('yourpath/*.gif'): #assuming gif
    im=Image.open(filename)
    image_list.append(im)

In [31]:
import glob
glob.glob('./datasets/2500_4500/test/A/*.png')

['./datasets/2500_4500/test/A\\0_image0.png',
 './datasets/2500_4500/test/A\\0_image10.png',
 './datasets/2500_4500/test/A\\0_image100.png',
 './datasets/2500_4500/test/A\\0_image105.png',
 './datasets/2500_4500/test/A\\0_image110.png',
 './datasets/2500_4500/test/A\\0_image115.png',
 './datasets/2500_4500/test/A\\0_image120.png',
 './datasets/2500_4500/test/A\\0_image125.png',
 './datasets/2500_4500/test/A\\0_image130.png',
 './datasets/2500_4500/test/A\\0_image135.png',
 './datasets/2500_4500/test/A\\0_image140.png',
 './datasets/2500_4500/test/A\\0_image145.png',
 './datasets/2500_4500/test/A\\0_image15.png',
 './datasets/2500_4500/test/A\\0_image150.png',
 './datasets/2500_4500/test/A\\0_image155.png',
 './datasets/2500_4500/test/A\\0_image160.png',
 './datasets/2500_4500/test/A\\0_image165.png',
 './datasets/2500_4500/test/A\\0_image170.png',
 './datasets/2500_4500/test/A\\0_image175.png',
 './datasets/2500_4500/test/A\\0_image180.png',
 './datasets/2500_4500/test/A\\0_image185.pn