In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import sys
sys.path.append('../')

In [None]:
import concurrent.futures
import pandas as pd
import geopandas as gpd
from pathlib import Path
from tqdm import tqdm
import ee
ee.Initialize()


import scripts.get_sources as get_sources

In [None]:
def get_explanatories(row):
    
    date, lon, lat = row['date'], *row["coordinate"]
    aoi = ee.Geometry.Point(lon, lat)
    
    if not get_sources.is_s1_image(date, aoi):
        return
    
    # TS scans will include the s1 image.
    print("Processing image", date, lon, lat)

    def get_images(get_image):
        
        image = get_image(date, aoi)

        print("Processing band", get_image.__name__, date, lon, lat)

        result = (
             image.reduceRegion(
                reducer=ee.Reducer.toList(),
                geometry=aoi.buffer(50),
                maxPixels=1e13,
                scale=50
            ).getInfo())

        print("Processing band", get_image.__name__, date, lon, lat, "Done!")

        return result
    
    sources = [
        get_sources.get_tsscans,
        get_sources.get_gldas,
        get_sources.get_gpm,
        get_sources.get_srtm,
        get_sources.get_globcover,
        get_sources.get_gedi,
        get_sources.get_hansen
    ]

    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:

        futures = {
            executor.submit(get_images, source): (date, lon, lat) for source in sources
        }

        result = {}

        for future in concurrent.futures.as_completed(futures):
            future_name = futures[future]
            result[future_name] = future.result()

    # props = image.getInfo()['properties']
    # scene_id = props['system:index']
    # ee_time = image.date().getInfo()
    # ee_geom = image.geometry().getInfo()
    # orbit_direction = 1 if props['orbitProperties_pass'] == 'ASCENDING' else 2
    # acq_date = scene_id[17:25]

    #         'scene_id': scene_id,
    #         'acq_date': acq_date,
    #         'ee_time': ee_time,
    #         'ee_geom': ee_geom,
    #         'orbit_direction': orbit_direction

        
    print(result)


    # bandlist = [
    #  'VV', 'VH', 'VVVH_ratio', 'angle', 'LIA', #'layover', 'shadow', 'no_data_mask', 
    #  'precipitation', 'prec_3', 'prec_7', 'prec_30', 
    #  'gldas_mean', 'gldas_stddev', 'sm_gldas', 'sm_3', 'sm_7', 'sm_30',
    #  'elevation', 'aspect', 'slope', 
    #  'landcover',
    #  'kVV_mean', 'kVV_stdDev', 'kVV_p5', 'kVV_p95', 'kVH_mean', 'kVH_stdDev', 'kVH_p5', 'kVH_p95', 
    #  'VV_mean', 'VV_stdDev', 'VV_p5', 'VV_p95', 'VH_mean', 'VH_stdDev', 'VH_p5', 'VH_p95'
    #  'canopy_height',
    #  'B7', 'B5', 'B4', 'B3', 'ndvi', 'ndmi', 'ndbri'
    # ]

    # for band in bandlist:
    #     if band == 'landcover':
    #         counts = np.bincount(data_dict[band])
    #         row[band] = np.argmax(counts)
    #     else:
    #         row[band] = np.mean(data_dict[band])

    # d = {}
    # d[i] = row
    # df = pd.DataFrame.from_dict(d, orient='index')
    # df.to_pickle(f'/home/vollrath/Indonesia_sm/samples_all/{i}.gpm.pickle')
    

In [None]:
df = pd.read_csv('../data/6_extracted_sm_data/all_extracted_data.csv')[:10]
df["coordinate"] = df["coordinate"].apply(lambda x: eval(x))
df.head()

In [None]:
# with tqdm(total=len(gdf), file=sys.stdout) as pbar:

with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
    
    futures = {
        executor.submit(get_explanatories, row) : (row["date"], (row["coordinate"]))  for _, row in df.iterrows()
    }

    result = {}

    for future in concurrent.futures.as_completed(futures):
        
        result[future] = future.result()



            # pbar.set_description('processed: %d' % (i))
            # pbar.update(1)
            

In [None]:
_, row = next(iter(df.iterrows()))
date, lon, lat = row['date'], *row["coordinate"]
date, lon, lat

In [None]:
for i, file in enumerate(Path('/home/vollrath/Indonesia_sm/samples_all/').glob('*.gpm.pickle')):
#    print(file)
    if i == 0:
        df = pd.read_pickle(file)
    else:
        df = pd.concat([df, pd.read_pickle(file)])

In [None]:
df.columns

In [None]:
df_no_nans = df.dropna()
print(len(df))
print(len(df_no_nans))
gdf = gpd.GeoDataFrame(df_no_nans, geometry=gpd.points_from_xy(df_no_nans.lon, df_no_nans.lat))
gdf.to_file('/home/vollrath/Indonesia_sm/samples_all/combined_s1_extract.gpkg', driver='GPKG')

In [None]:
c = gpd.read_file('/home/vollrath/Indonesia_sm/samples_all/combined_s1_extract.gpkg')
len(c)