In [None]:
import geopandas as gpd
from pathlib import Path
import pandas as pd
import numpy as np
import swifter
import matplotlib.pyplot as plt
import rasterio
import datetime
import os, shutil
from joblib import delayed, Parallel
import tqdm

### Settings 

In [None]:
# Slumps
INFERENCE_DIR = Path(r'Q:\p_aicore_pf\initze\processed\inference\RTS_6Regions_V01_UnetPlusPlus_resnet34_FocalLoss_sh6_50_bs100_2021-12-04_22-27-53')
# Pingo
#INFERENCE_DIR = Path(r'Q:\p_aicore_pf\initze\processed\inference\pingo_UnetPP_v1_2021-12-12_09-56-50')

OUTPUT_DIR = Path(r'C:\Users\initze\OneDrive\100_AI-CORE\16_inference_statistics')
out_file = OUTPUT_DIR / f'{INFERENCE_DIR.stem}_merged_datasets.shp'

In [None]:
print(out_file)

In [None]:
def get_vector(f):
    gdf = gpd.read_file(f).to_crs(epsg=4326)
    gdf['id_local'] = gdf.index
    gdf['dataset'] = f.stem
    gdf['model'] = f.parts[-2]
    split = f.stem.split('_')
    if len(split)==4:
        gdf[['scene', 'tile_id', 'date', 'sensor']] = split
    else:
        gdf[['date', 'scene', 'sensor']] = split
    return gdf

def load_dataset(f):
    try:
        return get_vector(f)
    except:
        print(f'Error on {f.stem}')

### create filelist

In [None]:
flist = list(INFERENCE_DIR.glob('*'))

#### Load files and add to list 

In [None]:
%time ds_list = Parallel(n_jobs=10)(delayed(load_dataset)(f) for f in tqdm.tqdm_notebook(flist[:]))

In [None]:
ds_list = []
for f in flist[:]:
    try:
        ds_list.append(get_vector(f))
    except:
        print(f'Error on {f.stem}')

#### Merge all GDF to one 

In [None]:
rdf = gpd.GeoDataFrame( pd.concat( ds_list, ignore_index=True) )

#### Set projection (got lost during merge with pandas) 

In [None]:
rdf = rdf.set_crs(epsg=4326)

#### Calculate time variables for later analysis 

In [None]:
rdf['year'] = pd.to_datetime(rdf.iloc[:]['date'], infer_datetime_format=True).dt.year
rdf['month'] = pd.to_datetime(rdf.iloc[:]['date'], infer_datetime_format=True).dt.month
rdf['doy'] = pd.to_datetime(rdf.iloc[:]['date'], infer_datetime_format=True).dt.day_of_year

#### Write to file

In [None]:
rdf.to_file(out_file)