In [21]:
# import libraries
import json
import tensorflow as tf
import tensorflow.keras.backend as K
import numpy as np 
import pandas as pd
import seaborn as sns
import os
import gc
import rasterio as rio
from PIL import Image
import matplotlib.pyplot as plt
from matplotlib import  cm
import cv2
from matplotlib import animation
from IPython.display import HTML
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [22]:
# import the csv files

s1_data = pd.read_csv('s1_data.csv')
s2_data=pd.read_csv('s2_data.csv')

In [23]:

class CFG:
    """
    This class provides a set of parameters and constants that may be used throughout a machine learning 
    pipeline for image classification, specifically in the context of identifying flooded areas.
    """
    seed = 3 # random initialization of weights in a machine learning model
    img_size = (256,256) # representing the dimensions of an image, specifically 256 x 256 pixels.
    BATCH_SIZE = 3 #  representing the number of samples that will be fed to a machine learning model during training.
    Autotune = tf.data.AUTOTUNE # a constant value from the tf.data.AUTOTUNE module that enables dynamic 
    # allocation of computational resources to improve performance.
    validation_size = 0.2 # a float value of 0.2 representing the fraction of the training dataset to be used for validation during training.
    class_dict= {0:'No Flooding', 
                 1: 'Flooding'}
    
    test_run = False # in training mode

In [24]:
# quick visual confirmation that all label files have corresponding source files in the dataset

# Set the path for Dataset
s1_labels = 'sen12flood/sen12floods_s1_labels/sen12floods_s1_labels/'
s1_tiles = 'sen12flood/sen12floods_s1_source/sen12floods_s1_source/'

s2_tiles = 'sen12flood/sen12floods_s2_source/sen12floods_s2_source/'
s2_labels = 'sen12flood/sen12floods_s2_labels/sen12floods_s2_labels/'


s1_check = 0
for file in os.listdir(s1_labels):
    if os.path.exists(s1_tiles + '/' + file.replace('labels','source')):
        s1_check += 1
        
         
assert s1_check == len(os.listdir(s1_tiles)), 'Not present'
    
s2_check = 0
for file in os.listdir(s2_labels):
    if os.path.exists(s2_tiles + '/' + file.replace('labels','source')):
        s2_check += 1
        
        
assert s2_check == len(os.listdir(s2_tiles)), 'Not present'


s1_check,s2_check 

(3332, 2237)

In [25]:
def load_raster(filepath): # filepath of the raster file to be loaded
    '''load a single band raster'''
    with rio.open(filepath) as file: 
        # the squeeze method is called on the resulting array to remove any singleton dimensions 
        # (i.e., dimensions with size 1). This is done using the axis=0 argument, 
        # which tells squeeze to remove any singleton dimensions along the first axis.  
        raster = file.read().squeeze(axis=0)

        
    return raster

In [26]:
def load_s1_tiffs(folder,
                  scaling_values=[50.,100.]):
    images = []
    i = 0
    for im in sorted(os.listdir(folder)):
         
        if im.rsplit('.',maxsplit=1)[1] == 'tif':
            
            path = folder + '/' + im
            band = load_raster(path)
            band = band / scaling_values[i]
            
            band = cv2.resize(band,
                              CFG.img_size)
            
            images.append(band)
            i+=1 
                    
    return np.dstack(images)


def load_s2_tiffs(folder,
                  scaling_value=10000.):
    images = []
    for im in sorted(os.listdir(folder)):
        if im.rsplit('.',maxsplit=1)[1] == 'tif':    
            path = folder + '/' + im
            band = load_raster(path)
            band = band/ scaling_value
            
            band = cv2.resize(band,CFG.img_size)
            images.append(band)   

    return np.dstack(images)
                    
def load_rgb_tiffs(folder,
                  scaling_value=10000.):
    '''load R,G and B bands'''
    
    images = []
    for im in sorted(os.listdir(folder)):
        name,file_format = im.rsplit('.',maxsplit=1)
        if ((file_format== 'tif') and (name in ['B02','B03','B04'])):    
            path = folder + '/' + im
            band = load_raster(path)
            band = band/ scaling_value
            
            band = cv2.resize(band,CFG.img_size)
            images.append(band)   

    return np.dstack(images)[:,:,::-1]


    
def tf_load_s1(path):    
    path = path.numpy().decode('utf-8')
    return load_s1_tiffs(path)
    
    

def tf_load_s2(path):    
    path = path.numpy().decode('utf-8')
    return load_s2_tiffs(path)


def tf_load_rgb(path):    
    path = path.numpy().decode('utf-8')
    return load_rgb_tiffs(path)
    
def process_image_s1(filename):
    '''function for preprocessing in tensorflow data'''
    
    return tf.py_function(tf_load_s1, 
                          [filename], 
                          tf.float32)



def process_image_s2(filename):
    '''function for preprocessing in tensorflow data'''
    
    return tf.py_function(tf_load_s2, 
                          [filename], 
                          tf.float32)



def process_image_rgb(filename):
    '''function for preprocessing in tensorflow data'''
    
    return tf.py_function(tf_load_rgb, 
                          [filename], 
                          tf.float32)
    

In [27]:
def count_rasters_in_folder(path):
    count = 0 
    
    for file in os.listdir(path):
        if file.rsplit('.',1)[1] == 'tif':
            count +=1 
            
    return count 
    
    
s2_data['raster_count'] = s2_data.image_dir.apply(lambda x : count_rasters_in_folder(x))

#value counts 
s2_data['raster_count'].value_counts()


s2_data=s2_data[s2_data['raster_count']==12] # take only valid rasters
# s2_data[s2_data['raster_count']==0]['location_id'].value_counts()

# Finding Data points with flood

In [28]:
s1_data['label'].value_counts()

0    2300
1    1031
Name: label, dtype: int64

In [29]:
s2_data['label'].value_counts()

0    1633
1     505
Name: label, dtype: int64

# For Sentinel 1 flooded regions 

In [30]:
s1_data.head()

Unnamed: 0,geometry,label,date,tile_number,id,location_id,image_dir
0,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2018-12-23,1,sen12floods_s1_labels_0001_2018_12_23,1,sen12flood/sen12floods_s1_source/sen12floods_s...
1,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2019-01-04,1,sen12floods_s1_labels_0001_2019_01_04,1,sen12flood/sen12floods_s1_source/sen12floods_s...
2,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2019-01-16,1,sen12floods_s1_labels_0001_2019_01_16,1,sen12flood/sen12floods_s1_source/sen12floods_s...
3,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2019-01-28,1,sen12floods_s1_labels_0001_2019_01_28,1,sen12flood/sen12floods_s1_source/sen12floods_s...
4,"[[[18.627334, -33.849575], [18.626057, -33.895...",0,2018-12-16,4,sen12floods_s1_labels_0004_2018_12_16,4,sen12flood/sen12floods_s1_source/sen12floods_s...


In [31]:
# find data set
flood_s1=pd.DataFrame()

for i in s1_data.iloc():
    if(i['label']==1):
        # print(i.id)
        flood_s1=flood_s1.append(i)


  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1=flood_s1.append(i)
  flood_s1

In [32]:
# reindex train data

flood_s1.reset_index(drop=True,inplace=True)
flood_s1.head()

Unnamed: 0,geometry,label,date,tile_number,id,location_id,image_dir
0,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-07,-119,sen12floods_s1_labels_0137_2019_03_07,137,sen12flood/sen12floods_s1_source/sen12floods_s...
1,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-13,-119,sen12floods_s1_labels_0137_2019_03_13,137,sen12flood/sen12floods_s1_source/sen12floods_s...
2,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-14,-119,sen12floods_s1_labels_0137_2019_03_14,137,sen12flood/sen12floods_s1_source/sen12floods_s...
3,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-19,-119,sen12floods_s1_labels_0137_2019_03_19,137,sen12flood/sen12floods_s1_source/sen12floods_s...
4,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-20,-119,sen12floods_s1_labels_0137_2019_03_20,137,sen12flood/sen12floods_s1_source/sen12floods_s...


In [33]:
 # saving datasets
flood_s1.to_csv('flood_s1.csv',index=False)


# for Sentinel 2 flooded regions

In [34]:
s2_data.head()

Unnamed: 0,geometry,label,date,tile_number,id,location_id,image_dir,raster_count
0,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2018-12-18,1,sen12floods_s2_labels_0001_2018_12_18,1,sen12flood/sen12floods_s2_source/sen12floods_s...,12
1,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2018-12-23,1,sen12floods_s2_labels_0001_2018_12_23,1,sen12flood/sen12floods_s2_source/sen12floods_s...,12
2,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2019-01-02,1,sen12floods_s2_labels_0001_2019_01_02,1,sen12flood/sen12floods_s2_source/sen12floods_s...,12
3,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2019-01-12,1,sen12floods_s2_labels_0001_2019_01_12,1,sen12flood/sen12floods_s2_source/sen12floods_s...,12
4,"[[[-13.278048, 8.493532], [-13.278254, 8.44724...",0,2019-01-17,1,sen12floods_s2_labels_0001_2019_01_17,1,sen12flood/sen12floods_s2_source/sen12floods_s...,12


In [35]:
# find data set
flood_s2=pd.DataFrame()

for i in s2_data.iloc():
    if(i['label']==1):
        # print(i.id)
        flood_s2=flood_s2.append(i)


  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2=flood_s2.append(i)
  flood_s2

In [36]:
# reindex train data

flood_s2.reset_index(drop=True,inplace=True)
flood_s2.head()

Unnamed: 0,geometry,label,date,tile_number,id,location_id,image_dir,raster_count
0,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-10,-119,sen12floods_s2_labels_0137_2019_03_10,137,sen12flood/sen12floods_s2_source/sen12floods_s...,12
1,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-25,-119,sen12floods_s2_labels_0137_2019_03_25,137,sen12flood/sen12floods_s2_source/sen12floods_s...,12
2,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-03-30,-119,sen12floods_s2_labels_0137_2019_03_30,137,sen12flood/sen12floods_s2_source/sen12floods_s...,12
3,"[[[34.28994, -15.441015], [34.290227, -15.4872...",1,2019-04-09,-119,sen12floods_s2_labels_0137_2019_04_09,137,sen12flood/sen12floods_s2_source/sen12floods_s...,12
4,"[[[34.242504, -15.487565], [34.242781, -15.533...",1,2019-03-10,-118,sen12floods_s2_labels_0138_2019_03_10,138,sen12flood/sen12floods_s2_source/sen12floods_s...,12


In [37]:
 # saving datasets
flood_s2.to_csv('flood_s2.csv',index=False)
