In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../')

In [3]:
import concurrent.futures
import pandas as pd
import geopandas as gpd
from pathlib import Path
import ee
import pickle
from tqdm.auto import tqdm

import scripts.get_sources as get_sources

ee.Initialize(opt_url="https://earthengine-highvolume.googleapis.com", project="indonesiatest")

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# make dir to store pickle files
training_path = Path('../data/7_training_data')
training_path.mkdir(parents=True, exist_ok=True)

In [5]:

def get_explanatories(row):
    
    date, lon, lat = row['date'], *row["coordinate"]
    aoi = ee.Geometry.Point(lon, lat)
    
    if not get_sources.is_s1_image(date, aoi):
        return
    
    # TS scans will include the s1 image.

    def get_images(get_image):

        print("Processing", get_image.__name__)
        
        image = get_image(date, aoi)
        
        result = (
             image.reduceRegion(
                reducer=ee.Reducer.toList(),
                geometry=aoi.buffer(50),
                maxPixels=1e13,
                scale=100
            )).getInfo()
        
        print("Done", get_image.__name__)

        return result
    
    sources = [
        get_sources.get_tsscans,
        get_sources.get_gldas,
        get_sources.get_gpm,
        get_sources.get_srtm,
        get_sources.get_globcover,
        get_sources.get_gedi,
        get_sources.get_hansen
    ]

    with concurrent.futures.ThreadPoolExecutor() as executor:

        futures = {
            executor.submit(get_images, source): source.__name__ for source in sources
        }

        result = {}

        for future in concurrent.futures.as_completed(futures):
            future_name = futures[future]
            result[future_name] = future.result()

    return result

In [6]:
df = pd.read_csv('../data/6_extracted_sm_data/all_extracted_data.csv')
df["coordinate"] = df["coordinate"].apply(lambda x: eval(x))

In [7]:
with tqdm(total=len(df), file=sys.stdout) as pbar:

    with concurrent.futures.ThreadPoolExecutor() as executor:
        
        futures = {}
        for _, row in df.iterrows():
            
            future_name = (row["point_id"], row["date"], (row["coordinate"]))
            future_result_path = training_path / f'{future_name}.p'
            
            if future_result_path.exists():
                pbar.update(1)
                continue

            futures[executor.submit(get_explanatories, row)] = future_name
        
        result = {}

        for future in concurrent.futures.as_completed(futures):

            future_name = futures[future]
            if future.result() is not None:

                # merge all the sub dicts into one dict
                result[future_name] = {k: v for d in list(future.result().values()) for k, v in d.items()}
                
                future_result_path = training_path / f'{future_name}.p'

                # Save the results to a file.
                with open(future_result_path, 'wb') as f:
                    pickle.dump(result, f)
            
            pbar.update(1)

  0%|          | 0/427757 [00:00<?, ?it/s]

In [None]:
result

In [None]:
pd.DataFrame.from_dict(result, orient="index")

In [None]:
date = "2024-04-16"
get_hansen(date).getInfo()

In [None]:
_, row = next(iter(df.iterrows()))
date, lon, lat = row['date'], *row["coordinate"]
date, lon, lat

In [None]:
for i, file in enumerate(Path('/home/vollrath/Indonesia_sm/samples_all/').glob('*.gpm.pickle')):
#    print(file)
    if i == 0:
        df = pd.read_pickle(file)
    else:
        df = pd.concat([df, pd.read_pickle(file)])

In [None]:
df.columns

In [None]:
df_no_nans = df.dropna()
print(len(df))
print(len(df_no_nans))
gdf = gpd.GeoDataFrame(df_no_nans, geometry=gpd.points_from_xy(df_no_nans.lon, df_no_nans.lat))
gdf.to_file('/home/vollrath/Indonesia_sm/samples_all/combined_s1_extract.gpkg', driver='GPKG')

In [None]:
c = gpd.read_file('/home/vollrath/Indonesia_sm/samples_all/combined_s1_extract.gpkg')
len(c)