In [58]:
#Merge data
json_outputs = fetch_data_from_postgres()


--- DB SIGN IN: ---
reading .env files...
successfully connected to:	 postgres
available schemas:		 ecosystem_classifier, information_schema, public
setting default schema to:	 ecosystem_classifier
tables in default schema:	 radiance_2020, earthdata_1980, meteostat_2017, us_rules, meteostat_2010, meteostat_2012, meteostat_2013, meteostat_2014, meteostat_2018, elevation, us_gaz, earthdata_1990, earthdata_1995, us_lex, meteostat_2015, earthdata_2011, earthdata_2012, meteostat_2011, meteostat_stations, earthdata_1970, earthdata_2019, earthdata_2000, earthdata_2005, earthdata_2010, earthdata_2013, meteostat_2016, spatial_ref_sys, app_contact_messages, earthdata_2014, meteostat_2019, meteostat_2020, meteostat_2021, earthdata_2015, earthdata_2016, earthdata_2017, earthdata_2018, meteostat_2005, earthdata_1960, earthdata_2021, earthdata_2022, earthdata_2023, earthdata_2020, earthdata_1948, earthdata_1950

--- READING FETCHING PARAMETERS: ---
lat_min:	 3
lat_max:	 15
lon_min:	 -20
lon_max:	 5

In [62]:
import json
import pandas as pd

# Initialize an empty list to store the DataFrames
dfs = []

# Loop over each table in json_outputs
for key, data in json_outputs.items():
    # Parse JSON string if necessary
    if isinstance(data, str):
        try:
            parsed_data = json.loads(data)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for table '{key}': {e}")
            continue  # Skip this table if JSON parsing fails
    else:
        parsed_data = data

    # Convert parsed data to a DataFrame
    try:
        df = pd.DataFrame(parsed_data)
    except ValueError as e:
        print(f"Error creating DataFrame for table '{key}': {e}")
        continue  # Skip this table if DataFrame creation fails

    # Standardize column names to 'lat' and 'lon' for merging
    if 'latitude' in df.columns and 'longitude' in df.columns:
        df.rename(columns={'latitude': 'lat', 'longitude': 'lon'}, inplace=True)
    elif 'lat' not in df.columns or 'lon' not in df.columns:
        print(f"Warning: Table '{key}' is missing required 'lat'/'lon' or 'latitude'/'longitude' columns.")
        continue  # Skip tables without valid latitude/longitude columns

    # Append the DataFrame to the list
    dfs.append(df)

# Merge all DataFrames on 'lat' and 'lon' if there are valid DataFrames
if dfs:
    merged_df = dfs[0]
    for df in dfs[1:]:
        try:
            merged_df = merged_df.merge(df, on=['lat', 'lon'], how='outer')
        except KeyError as e:
            print(f"Error merging table due to missing 'lat'/'lon' columns: {e}")
            continue  # Skip merging if 'lat'/'lon' columns are missing

    # Display the merged DataFrame
    print("\nMerged DataFrame:")
    print(merged_df.head(10))
else:
    print("No valid tables with latitude and longitude data were found.")



Merged DataFrame:
     lat     lon  elevation  radiance  Swnet_min  Swnet_max  Swnet_avg  \
0  3.125   6.625        NaN  0.186914        NaN        NaN        NaN   
1  3.125   6.875        NaN  0.221774        NaN        NaN        NaN   
2  3.125   9.875        NaN  0.126922        NaN        NaN        NaN   
3  3.125  10.125       40.0  0.012757  151.43755  203.05194  171.92729   
4  3.125  10.375      110.0       NaN  152.86908  203.03526  171.89952   
5  3.125  10.625      362.0       NaN  152.27028  206.41410  175.54732   
6  3.125  10.875      633.0       NaN  154.93806  207.57379  178.19983   
7  3.125  11.125      831.0       NaN  157.89967  212.25233  183.51044   
8  3.125  11.375      703.0       NaN  156.04794  209.56664  181.43266   
9  3.125  11.625      716.0       NaN  156.02440  207.93358  180.82967   

   Lwnet_min  Lwnet_max  Lwnet_avg  ...  Qair_avg  Psurf_min   Psurf_max  \
0        NaN        NaN        NaN  ...       NaN        NaN         NaN   
1        NaN  

In [60]:
merged_df.shape

(11220, 112)

In [61]:
merged_df["Albedo_avg"]

0              NaN
1              NaN
2              NaN
3        12.185192
4        12.413850
           ...    
11215    19.827429
11216    19.704557
11217    19.624940
11218    19.249939
11219          NaN
Name: Albedo_avg, Length: 11220, dtype: float64

In [53]:
for col in merged_df.columns:
    print(col)

lat
lon
elevation
Swnet_min
Swnet_max
Swnet_avg
Lwnet_min
Lwnet_max
Lwnet_avg
Qle_min
Qle_max
Qle_avg
Qh_min
Qh_max
Qh_avg
Qg_min
Qg_max
Qg_avg
Snowf_min
Snowf_max
Snowf_avg
Rainf_min
Rainf_max
Rainf_avg
Evap_min
Evap_max
Evap_avg
Qs_acc_min
Qs_acc_max
Qs_acc_avg
Qsb_acc_min
Qsb_acc_max
Qsb_acc_avg
Qsm_acc_min
Qsm_acc_max
Qsm_acc_avg
AvgSurfT_min
AvgSurfT_max
AvgSurfT_avg
Albedo_min
Albedo_max
Albedo_avg
SWE_min
SWE_max
SWE_avg
SnowDepth_min
SnowDepth_max
SnowDepth_avg
SoilM010_min
SoilM010_max
SoilM010_avg
SoilM1040_min
SoilM1040_max
SoilM1040_avg
SoilM40100_min
SoilM40100_max
SoilM40100_avg
SoilM100200_min
SoilM100200_max
SoilM100200_avg
SoilT010_min
SoilT010_max
SoilT010_avg
SoilT1040_min
SoilT1040_max
SoilT1040_avg
SoilT40100_min
SoilT40100_max
SoilT40100_avg
SoilT100200_min
SoilT100200_max
SoilT100200_avg
PotEvap_min
PotEvap_max
PotEvap_avg
ECanop_min
ECanop_max
ECanop_avg
Tveg_min
Tveg_max
Tveg_avg
ESoil_min
ESoil_max
ESoil_avg
RootMoist_min
RootMoist_max
RootMoist_avg
CanopInt_m