In [None]:
#Import necessary packages
import os
import glob
from tqdm import tqdm
import pandas as pd
import numpy as np
import rasterio

In [None]:
# Creation of dataframe from the chip info
d = {'vv': sorted(glob.glob(os.path.join('data','chips','VV','*.tif'), recursive = True)),
     'vh': sorted(glob.glob(os.path.join('data','chips','VH','*.tif'), recursive = True)),
     'dem': sorted(glob.glob(os.path.join('data','dem','*.tif'), recursive = True)),
     'pwater': sorted(glob.glob(os.path.join('data','pwater','*.tif'), recursive = True)),
     'label': sorted(glob.glob(os.path.join('data','labels','*.tif'), recursive = True))}
df = pd.DataFrame(data=d)

# Chip statistics calculation
with tqdm(total=len(df),position=0, leave=True, desc=f'Chip statistics calculation') as pbar:
    for index, row in df.iterrows():
        wlbel = rasterio.open(row['label'])
        wlabel_band = wlbel.read(1)
        df.loc[index,'water_percent'] = np.count_nonzero(wlabel_band)*100 /wlabel_band.size
        wlbel.close()
        pbar.update(n=1)

In [None]:
# Saving the all chip infor dataframe
df.to_csv(os.path.join('data','chip_info.csv'),index=False)

In [None]:
df_filtered = df[(df['water_percent'] >= 1) & (df['water_percent'] < 100.0)]

In [None]:
# Making two datasets based on water percentage of tiles
# dataset01
df_wp_gt_30 = df_filtered[df_filtered['water_percent'] >= 30.0]
# dataset02
df_wp_lt_30 = df_filtered[df_filtered['water_percent'] < 30.0]

In [None]:
# Saving the test data dataframe
df_wp_gt_30_test = df_wp_gt_30.sample(n = 30, random_state = 42)
df_wp_gt_30 = df_wp_gt_30.drop(list(df_wp_gt_30_test.index))

df_wp_lt_30_test = df_wp_lt_30.sample(n = 10, random_state = 42)
df_wp_lt_30 = df_wp_lt_30.drop(list(df_wp_lt_30_test.index))

chip_test_df = pd.concat([df_wp_gt_30_test,df_wp_lt_30_test])
chip_test_df = chip_test_df[chip_test_df['water_percent'] < 99.9]
chip_test_df = chip_test_df.sort_values(by=['water_percent'])

In [None]:
# Saving the dataset1 dataframe
df_wp_gt_30.to_csv(os.path.join('data','chip_set1.csv'),index=False)
# Saving the dataset2 dataframe
df_wp_lt_30.to_csv(os.path.join('data','chip_set2.csv'),index=False)
# Saving the test data dataframe
chip_test_df.to_csv(os.path.join('data','chip_test.csv'),index=False)