In [1]:
import geopandas as gpd
import sqlite3
import pandas as pd
from shapely import wkt
import pyogrio
from tqdm import tqdm
import numpy as np

geopackage_path = "C:/Users/eleonore.kong/Documents/InSAR/DATA/harmonic_egms_data.gpkg"
grid_path = "C:/Users/eleonore.kong/Documents/InSAR/DATA/grid.gpkg"
grids = ['grid_2km', 'grid_1km', 'grid_500m']
grid_to_process = 'grid_500m'
output = "C:/Users/eleonore.kong/Documents/InSAR/DATA/"

df = gpd.read_file(grid_path, layer=grid_to_process)
df = df[['id', 'geometry']]
print(len(df))

2651968


In [7]:
def med_seasonal_amplitude(row, season, stat):
    years = [2016,2017,2018,2019,2020,2021]
    if season == 'winter':
        years = [year-1 for year in years]
    columns = ['Detrended Amplitude_' + season + '_' + str(year) + '_' + stat for year in years]
    amp = [row[column] for column in columns]
    med = np.median(amp)
    return med

def summer_winter_amplitude_diff_per_yr(row, year, stat):
    diff = row['Detrended Amplitude_summer_' + str(year) + '_' + stat] - row['Detrended Amplitude_winter_' + str(year-1) + '_' + stat]
    return diff

def med_summer_winter_amplitude_diff(row, stat):
    years = [2016,2017,2018,2019,2020,2021]
    # columns = [column_name + str(year) for year in years]
    diffs = [summer_winter_amplitude_diff_per_yr(row, year, stat) for year in years]
    med = np.median(diffs)
    return med

def amplitude_evolution(row, stat):
    years = [2016,2017,2018,2019,2020,2021]
    columns = ['Detrended Amplitude_' + str(year) + '_' + stat for year in years]
    evol = [row[columns[i+1]] - row[columns[i]] for i in range(len(columns)-1)]
    med = np.median(evol)
    return med

def seasonal_amplitude_evolution(row, season, stat):
    if season == 'winter':
        years = [2015,2016,2017,2018,2019,2020]
        columns = ['Detrended Amplitude_' + season + '_' + str(year) + '_' + stat for year in years]
    if season == 'summer':
        years = [2016,2017,2018,2019,2020,2021]
        columns = ['Detrended Amplitude_' + season + '_' + str(year) + '_' + stat for year in years]

    evol = [row[columns[i+1]] - row[columns[i]] for i in range(len(columns)-1)]
    med = np.median(evol)
    return med

percentile_75 = lambda x: x.quantile(0.75)

In [8]:
i=0
chunk_size=20000
ids = df['id'].unique()
ids_sublists = [ids[i:i + chunk_size] for i in range(0, len(ids), chunk_size)]
median_gdf = {}
geom = {}

for ids_list in tqdm(ids_sublists):
    ids_list = tuple(ids_list)
    
    where_filter = f"{grid_to_process} IN {ids_list}"

    gdf = pyogrio.read_dataframe(geopackage_path, layer='data', where=where_filter)
    # columns_to_keep = [column for column in gdf.columns if 'Detrended' in column]
    # columns_to_keep = [column for column in gdf.columns if 'Error' not in column]
    # columns_to_keep = [column for column in columns_to_keep if 'Risk' not in column]
    # columns_to_keep = [column for column in columns_to_keep if 'Phase' not in column]
    # columns_to_keep.extend([grid_to_process, 'geometry'])
    # gdf = gdf[columns_to_keep].copy()
    
    if not gdf.empty :
        median_df = gdf.groupby(grid_to_process).agg({ 
            col : ['median', percentile_75] for col in gdf.select_dtypes(include='number').columns
        })
        median_df.columns = ['_'.join(col).strip() for col in median_df.columns]
        median_df.columns = [col.replace('<lambda_0>', 'p75') for col in median_df.columns]
        # median_df.drop(grids, axis=1, inplace=True)
        median_df = median_df.drop([col for col in median_df.columns if 'index' in col], axis=1)
        median_df = median_df.drop([col for col in median_df.columns if 'grid' in col], axis=1)

        median_df = median_df.reset_index()
        median_df = median_df.merge(df, left_on=grid_to_process, right_on='id', how='left')
        median_df.drop('id', axis=1, inplace=True)
        median_df = gpd.GeoDataFrame(median_df, crs=2154)

        column_name_diff_per_year = 'summer_winter_amplitude_diff_'

        for stat in ('median', 'p75'):
            median_df[stat + '_summer_winter_amplitude_diff'] = median_df.apply(lambda row : med_summer_winter_amplitude_diff(row, stat), axis=1)
        
            # median_df[stat + '_evolution_amplitude'] = median_df.apply(lambda row : amplitude_evolution(row, stat), axis=1)
        
            # for season in ['summer', 'winter']:
            #     median_df[stat + '_' + season + '_amplitude'] = median_df.apply(lambda row : med_seasonal_amplitude(row, season, stat), axis=1)
            #     median_df[stat + '_evolution_'+ season +'_amplitude'] = median_df.apply(lambda row : seasonal_amplitude_evolution(row, season, stat), axis=1)
        
        if i > 0:
            pyogrio.write_dataframe(median_df, output + 'grid_stats_agg.gpkg', layer=grid_to_process, driver="GPKG", append=True)
        else:
            pyogrio.write_dataframe(median_df, output + 'grid_stats_agg.gpkg', layer=grid_to_process, driver="GPKG")

    i += 1

100%|██████████| 133/133 [3:00:03<00:00, 81.23s/it] 
