# CREATING DATASET



In [None]:
import os
import datetime
import numpy as np
import pandas as pd
from PIL import Image

In [None]:
dataset = pd.DataFrame(columns=["rgb_path", "nir_path"])
val_dataset = pd.DataFrame(columns=["rgb_path", "nir_path"])

def lets_merge_datasets(dataset, df_to_merge):
  dataset = pd.concat([dataset, df_to_merge])
  print(f"Dataset len is: {len(dataset)}")
  return dataset

In [None]:
#1 creates a dataframe with img path and creation times
def get_df_time(dir, which="rgb"):
  df = pd.DataFrame(columns=[f"{which}_path", f"{which}_m_time", f"{which}_m_time_ns", f"{which}_c_time", f"{which}_c_time_ns" ])
  for name in os.listdir(dir):
    img_path = os.path.join(dir, name)
    c_time = os.path.getctime(img_path) # ctime
    c_time_ns = os.stat(img_path).st_ctime_ns # ctime nano-sec
    m_time = os.path.getmtime(img_path) # mtime sec
    m_time_ns = os.stat(img_path).st_mtime_ns # mtime nano-sec
    each_row = pd.DataFrame({f"{which}_path" : img_path,
                             f"{which}_m_time" : np.double(m_time), f"{which}_m_time_ns" : np.double(m_time_ns),
                             f"{which}_c_time" : np.double(c_time), f"{which}_c_time_ns" : np.double(c_time_ns)}, index=[0])
    
    df = pd.concat([df, each_row])
  df.reset_index(drop=True, inplace=True)
  return df


#2 get DATAFRAME from given NIR, RGB paths
def get_df_matches(rgb_dir, nir_dir):
  rgb_df = get_df_time(rgb_dir, which="rgb")
  nir_df = get_df_time(nir_dir, which="nir")

  data_match = pd.DataFrame(columns=["rgb_path", "nir_path"])
  threshold = 100000000 #nanoseconds
  print(f"The threshold set is {threshold} nanoseconds == {threshold * 1e-3} microseconds == {threshold * 1e-6} milliseconds == {threshold * 1e-9} seconds")

  for i in range(len(rgb_df)):
    for j in range(len(nir_df)):
      rgb_path, rgb_m_time, rgb_m_time_ns, rgb_c_time,	rgb_c_time_ns = rgb_df.iloc[i]
      nir_path, nir_m_time, nir_m_time_ns, nir_c_time,	nir_c_time_ns = nir_df.iloc[j]
      if (abs(np.double(rgb_c_time_ns - nir_c_time_ns)) <= threshold) and (abs(np.double(rgb_m_time_ns - nir_m_time_ns)) <= threshold):
        n_diff = abs(np.double(rgb_c_time_ns - nir_c_time_ns))
        m_diff = abs(np.double(rgb_m_time_ns - nir_m_time_ns))
        each_row = pd.DataFrame({
            "rgb_path" : rgb_path ,
            "nir_path" : nir_path,	
        }, index=[0])

        data_match = pd.concat([data_match, each_row])
        nir_df.drop(nir_df.index[j])
        break
  data_match.reset_index(drop=True, inplace=True)
  print(f"length of this dataframe is - {len(data_match)}")
  return data_match

In [None]:
rgb_dir = r"E:\road 1\R"
nir_dir = r"E:\road 1\N"
print(f"Total No. of RGB images collected is {len(os.listrdir(rgb_dir))} and Total No. of NIR images collected is {len(os.listdir(nir_dir))}")

df_to_merge = get_df_matches(rgb_dir, nir_dir)
dataset = lets_merge_datasets(dataset, df_to_merge)

The threshold set is 100000000 nanoseconds == 100000.0 microseconds == 100.0 milliseconds == 0.1 seconds
length of this dataframe is - 607
Dataset len is: 607


In [None]:
rgb_dir = r"E:\road 2\R"
nir_dir = r"E:\road 2\N"
print(f"Total No. of RGB images collected is {len(os.listrdir(rgb_dir))} and Total No. of NIR images collected is {len(os.listdir(nir_dir))}")

df_to_merge= get_df_matches(rgb_dir, nir_dir)
dataset = lets_merge_datasets(dataset, df_to_merge)

The threshold set is 100000000 nanoseconds == 100000.0 microseconds == 100.0 milliseconds == 0.1 seconds
length of this dataframe is - 690
Dataset len is: 1297


In [None]:
rgb_dir = r"E:\road 3\R"
nir_dir = r"E:\road 3\N"
print(f"Total No. of RGB images collected is {len(os.listrdir(rgb_dir))} and Total No. of NIR images collected is {len(os.listdir(nir_dir))}")

df_to_merge= get_df_matches(rgb_dir, nir_dir)
dataset = lets_merge_datasets(dataset, df_to_merge)

The threshold set is 100000000 nanoseconds == 100000.0 microseconds == 100.0 milliseconds == 0.1 seconds
length of this dataframe is - 457
Dataset len is: 1754


In [None]:
rgb_dir = r"E:\road 4\R"
nir_dir = r"E:\road 4\N"
print(f"Total No. of RGB images collected is {len(os.listrdir(rgb_dir))} and Total No. of NIR images collected is {len(os.listdir(nir_dir))}")

df_to_merge= get_df_matches(rgb_dir, nir_dir)
dataset = lets_merge_datasets(dataset, df_to_merge)

The threshold set is 100000000 nanoseconds == 100000.0 microseconds == 100.0 milliseconds == 0.1 seconds
length of this dataframe is - 200
Dataset len is: 1954


In [None]:
dataset.reset_index(drop=True, inplace=True)

In [None]:
# val dataset
rgb_dir = r"E:\ROAD 6\R"
nir_dir = r"E:\ROAD 6\N"
print(f"Total No. of RGB images collected is {len(os.listdir(rgb_dir))} and Total No. of NIR images collected is {len(os.listdir(nir_dir))}")

val_dataset = get_df_matches(rgb_dir, nir_dir)
val_dataset.reset_index(drop=True, inplace=True)

Total No. of RGB images collected is 327 and Total No. of NIR images collected is 349
The threshold set is 100000000 nanoseconds == 100000.0 microseconds == 100.0 milliseconds == 0.1 seconds
length of this dataframe is - 143


In [None]:
rgb_dir = r"E:\day-1-dataset\rgb"
nir_dir = r"E:\day-1-dataset\nir"

rgb_train = os.path.join(rgb_dir, "train")
nir_train = os.path.join(nir_dir, "train")

rgb_val = os.path.join(rgb_dir, "val")
nir_val = os.path.join(nir_dir, "val")

rgb_test = os.path.join(rgb_dir, "test")
nir_test = os.path.join(nir_dir, "test")

In [None]:
# train
for index, row in dataset.iterrows():
  rgb_path = row['rgb_path']
  nir_path = row['nir_path']
  
  rgb_img = Image.open(rgb_path)
  nir_img = Image.open(nir_path)
  
  name = str(index) + '.bmp'

  rgb_img.save(f"{rgb_train}/{name}")
  nir_img.save(f"{nir_train}/{name}")

In [None]:
#val, test
for index, row in val_dataset.iterrows():
  rgb_path = row['rgb_path']
  nir_path = row['nir_path']
  
  rgb_img = Image.open(rgb_path)
  nir_img = Image.open(nir_path)
  
  name = str(index) + '.bmp'
 
  rgb_img.save(f"{rgb_test}/{name}")
  nir_img.save(f"{nir_test}/{name}")  
