# ML4FLoods - Query Function Developement

Aim: To adapt and extend on code from ee_download.py in order to query avaliable Earth Engine data for a given AOI.

In [None]:
pip install git+https://github.com/spaceml-org/ml4floods#egg=ml4floods

In [5]:
import traceback
import warnings

from io import StringIO
import ee
import time
import os
from glob import glob
from typing import Optional, Callable, List, Tuple
from shapely.geometry import mapping, Polygon
import numpy as np
import geopandas as gpd
import pandas as pd
import fsspec
from datetime import datetime, timezone
import math

from ml4floods.data import ee_download, utils

### Setting up Search Parameters

In [6]:
#Reading in the AOI 

#For the purposes on this workflow we are assuming the AOI is given as a file that is readable by geopandas
def read_aoi(my_gpd_aoi:str):
    my_aoi = gpd.read_file(my_gpd_aoi)
    
    
    js = json.loads(sydney_aoi.to_json()) # convertin gthe aoi to a json and getting the 
    bounds = ee.Geometry(ee.FeatureCollection(js).geometry())
    
    return bounds
#for test case: using patch from sydney map

In [8]:
#Specifiying date of interest and converting to a variable to be passed, 
#inlcude year as a seperate variable for year long data (e.g. permanent water)

#User to input dates with format 'dd/mm/yy' seperated by a comma. 

date_range = ('dd/mm/yy', 'dd/mm/yy')

def get_datetime(date_range: tuple): # function returns a tuple containing datetime
    datetime_start= datetime.strptime(date_range[0], '%d/%m/%y')
    datetime_end= datetime.strptime(date_range[1], '%d/%m/%y')
    
    start_year = datetime_start.year
    end_year = datetime_end.year
    
    return datetime_start, datetime_end, start_year, end_year 

**QUESTION**: why do we have the start and end year? Will we use it later?

### Initial Queries

In [None]:
def get_imageCollections(bounds: ee.geometry.Geometry, date: tuple): # returns imageCollection

    get_landsat_collection = ee_download.get_landsat_collection
    landsat_imgs = get_landsat_collection(date_start=dates[0], date_end=dates[1] , bounds= bounds)

    get_s2_collection = ee_download.get_s2_collection
    s2_imgs = get_s2_collection(date_start=dates[0], date_end=dates[1] , bounds= bounds)
    
    return landsat_imgs, s2_imgs

In [None]:
def extract_properties(landsat_imgs, s2_imgs, init_bounds, aoi_code):
    
    coordinates = init_bounds['coordinates']
    
    landsat_dict = landsat_imgs.getInfo()
    s2_dict = s2_imgs.getInfo()
    
    n_landsat = len(landsat_dict['features'])
    print('Landsat images:', n_landsat)
    n_s2 = len(s2_dict['features'])
    print('s2 images:', n_s2)
    
    n_total = n_landsat + n_s2 
    
    properties_df = pd.DataFrame(index=np.arange(0, n_total), columns=['id', 'source', 'cloud_cover', 'percentage_valid_pixels', 'coordinates', 'aoi_code']) 
    
    
    for i in range(n_landsat):
        properties_df['id'][i] = landsat_dict['features'][i].get('id')
        properties_df['source'][i] = 'landsat'
        properties_df['cloud_cover'][i] = landsat_dict['features'][i].get('properties')['CLOUD_COVER']
        properties_df['percentage_valid_pixels'][i] = landsat_dict['features'][i].get('properties')['valids'] 
        properties_df['coordinates'][i] = coordinates
        properties_df['aoi_code'][i] = aoi_code
        #find area/co-ordinates associated with each image
        
    for i in range(n_s2):
        j = i + n_landsat
        properties_df['id'][j] = s2_dict['features'][i].get('id')
        properties_df['source'][j] = 's2'
        properties_df['cloud_cover'][j] = s2_dict['features'][i].get('properties')['CLOUD_COVERAGE_ASSESSMENT']
        properties_df['percentage_valid_pixels'][j] = s2_dict['features'][i].get('properties')['valids'] 
        properties_df['coordinates'][j] = coordinates
        properties_df['aoi_code'][j] = aoi_code
        #find area/co-ordinates associated with each image
    
    return properties_df