In this notebook:

- Night images are separated from daytime image based on time (extracted from the name of the files)
- Name and labels associated with daytime images are saved in a .csv file
- Daytime images are split in SIM and NAO images by saving them in folders with these names

Separating daytime and night images by hour is a simple but not perfect way. There were some images at 19:00 that were sent to night_images folder. The amount of images that were looking day images and sent to night_images was too little which justify still using this approach. For future a better method can be applied.


In [1]:
# folder where images are kept

images_folder = "../data/TTG_TUR/"

In [2]:
import os
import glob
import shutil

def split_morning_night_images(images_folder = os.getcwd()):
    """ Move night images (after 18:00 and before 05:00) 
    to another folder 
    
    Input: 
        - images_folder: Path of image files. If not especified is saved in current directory.
    
    """

    # make a directory to keep night images

    dir_name =  images_folder + '/night_images/'
    print(dir_name)

    if not os.path.exists(dir_name):
        os.mkdir(dir_name)
        
    # move night files to the new directory
    
    for file_path in glob.glob(images_folder+'*.jpeg'):
        if (int(file_path.split(' ')[-1].split('-')[0]) >18) or (int(file_path.split(' ')[-1].split('-')[0]) <5):  
            filename = file_path.split('\\')[-1]
            shutil.move(images_folder+'/'+filename, dir_name+'/'+filename)

In [3]:
# saving night images in another folder 

split_morning_night_images(images_folder)

../data/TTG_TUR//night_images/


In [4]:
import pandas as pd

def create_dataframe_morning_images(output_folder, images_folder = os.getcwd()):
    """ Create a dataframe with name of daytime images and the labels 1 = 'SIM' and 0 = 'NAO' 
    based on the name of the image
    
    Input: 
        - output_folder: path of folder to save .csv file with information about daytime images.
        - images_folder: Path of image files. If not especified is saved in current directory.
    
    Output: 
        - .csv file : with information about daytime images.
        - dataframe with information about daytime images.
    """
    
    filename = []
    labels = []
    
    for file_path in glob.glob(images_folder+'*.jpeg'):
        if (int(file_path.split(' ')[-1].split('-')[0]) <= 18) or (int(file_path.split(' ')[-1].split('-')[0]) >= 5):  
            filename.append(file_path.split('.')[-2].split('\\')[-1]) 
            labels.append(file_path.split('.')[-2].split('\\')[-1].split('_')[-1])
            
    labels = [1 if label == 'SIM' else 0 for label in labels ]
    dict_images = {'image_name':filename, 'label':labels}
    
    # save in .csv
    
    df = pd.DataFrame(dict_images)
    
    df.to_csv(output_folder+"info_day_images.csv", index = False)
    
    return df


In [5]:
# folder to save the .csv file

output_folder = "../data/"

In [6]:
# creating a dataframe with daytime images and labels based on the name of the files, i.e. 'SIM'=1 and 'NAO'=0 
# which indicate firebreak

create_dataframe_morning_images(output_folder, images_folder = images_folder)

Unnamed: 0,image_name,label
0,2017-12-12 11-53-41_SIM,1
1,2017-12-12 11-54-05_NAO,0
2,2017-12-12 11-54-15_SIM,1
3,2017-12-12 11-55-28_SIM,1
4,2017-12-12 11-56-30_NAO,0
...,...,...
6320,2019-09-23 07-26-55_SIM,1
6321,2019-09-23 07-26-57_SIM,1
6322,2019-09-23 07-28-24_SIM,1
6323,2019-09-23 07-35-16_SIM,1


In [8]:
# testing opening df

df = pd.read_csv(output_folder+"info_day_images.csv")
df

Unnamed: 0,image_name,label
0,2017-12-12 11-53-41_SIM,1
1,2017-12-12 11-54-05_NAO,0
2,2017-12-12 11-54-15_SIM,1
3,2017-12-12 11-55-28_SIM,1
4,2017-12-12 11-56-30_NAO,0
...,...,...
6320,2019-09-23 07-26-55_SIM,1
6321,2019-09-23 07-26-57_SIM,1
6322,2019-09-23 07-28-24_SIM,1
6323,2019-09-23 07-35-16_SIM,1


In [9]:
import os
import glob
import shutil

def split_SIM_NAO_images(images_folder = os.getcwd()):

    """ Move files from image_folder to subfolders based on the name of the files.
    
    Input: 
        - images_folder: Path of image files. If not especified is saved in current directory.
    
    """

    for file_path in glob.glob(images_folder + '*.jpeg'):
    
        dir_name = file_path.split('.')[-2].split('\\')[-1].split('_')[-1]
        filename = file_path.split('\\')[-1]
    
        if not os.path.exists(images_folder+'/'+dir_name):
            os.mkdir(images_folder+'/'+dir_name)
    
        shutil.move(images_folder+'/'+filename, images_folder+'/'+dir_name+'/'+filename)

In [10]:
# split daytime images in folders SIM and NAO based on the name of the file
split_SIM_NAO_images(images_folder)