In [1]:
from fetch_data import fetch_data_from_postgres
#Merge data
mvariables = {
    "lat_min": -2,
    "lat_max": 2,
    "lon_min": -60,
    "lon_max": -58,
    "years": [2020]
}

json_outputs = fetch_data_from_postgres(variables=mvariables)


--- DB SIGN IN: ---
reading .env files...
successfully connected to:	 postgres
available schemas:		 ecosystem_classifier, information_schema, public
setting default schema to:	 ecosystem_classifier
tables in default schema:	 radiance_2020, ndvi_2020, earthdata_1980, meteostat_2017, us_rules, meteostat_2010, meteostat_2012, meteostat_2013, meteostat_2014, meteostat_2018, meteostat_2000, elevation, us_gaz, earthdata_1990, earthdata_1995, us_lex, meteostat_2015, earthdata_2011, earthdata_2012, meteostat_1990, meteostat_2011, meteostat_stations, earthdata_1970, earthdata_2019, radiance_2017, radiance_2018, radiance_2019, radiance_2021, earthdata_2000, earthdata_2005, earthdata_2010, earthdata_2013, meteostat_2016, spatial_ref_sys, app_contact_messages, earthdata_2014, meteostat_2019, meteostat_2020, meteostat_2021, earthdata_2015, earthdata_2016, earthdata_2017, earthdata_2018, meteostat_2005, radiance_2012, radiance_2013, radiance_2014, radiance_2015, radiance_2016, radiance_2022, radian

In [2]:
import json
import pandas as pd

# Initialize an empty list to store the DataFrames
dfs = []

# Loop over each table in json_outputs
for key, data in json_outputs.items():
    # Parse JSON string if necessary
    if isinstance(data, str):
        try:
            parsed_data = json.loads(data)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON for table '{key}': {e}")
            continue  # Skip this table if JSON parsing fails
    else:
        parsed_data = data

    # Convert parsed data to a DataFrame
    try:
        df = pd.DataFrame(parsed_data)
    except ValueError as e:
        print(f"Error creating DataFrame for table '{key}': {e}")
        continue  # Skip this table if DataFrame creation fails

    # Standardize column names to 'lat' and 'lon' for merging
    if 'latitude' in df.columns and 'longitude' in df.columns:
        df.rename(columns={'latitude': 'lat', 'longitude': 'lon'}, inplace=True)
    elif 'lat' not in df.columns or 'lon' not in df.columns:
        print(f"Warning: Table '{key}' is missing required 'lat'/'lon' or 'latitude'/'longitude' columns.")
        continue  # Skip tables without valid latitude/longitude columns

    # Append the DataFrame to the list
    dfs.append(df)

# Merge all DataFrames on 'lat' and 'lon' if there are valid DataFrames
if dfs:
    merged_df = dfs[0]
    for df in dfs[1:]:
        try:
            merged_df = merged_df.merge(df, on=['lat', 'lon'], how='outer')
        except KeyError as e:
            print(f"Error merging table due to missing 'lat'/'lon' columns: {e}")
            continue  # Skip merging if 'lat'/'lon' columns are missing

    # Display the merged DataFrame
    print("\nMerged DataFrame:")
    print(merged_df.head(10))
else:
    print("No valid tables with latitude and longitude data were found.")



Merged DataFrame:
     lat     lon  elevation  radiance      ndvi  Swnet_min  Swnet_max  \
0 -1.875 -59.875       97.0  0.005382  0.721918  165.80704  229.59800   
1 -1.875 -59.875       97.0  0.005382  0.721918  165.80704  229.59800   
2 -1.875 -59.625       81.0       NaN  0.478903  166.42150  227.04250   
3 -1.875 -59.375       38.0  0.038720  0.557888  153.92062  221.87888   
4 -1.875 -59.375       38.0  0.038720  0.557888  153.92062  221.87888   
5 -1.875 -59.125      106.0       NaN  0.759872  158.95009  226.96942   
6 -1.875 -58.875      137.0       NaN  0.775727  153.02817  227.91917   
7 -1.875 -58.625       73.0       NaN  0.776553  152.32771  220.99000   
8 -1.875 -58.375       42.0       NaN  0.772459  150.33238  218.94513   
9 -1.875 -58.125       72.0       NaN  0.813295  148.02092  215.03604   

   Swnet_avg  Lwnet_min  Lwnet_max  ...  Qair_avg  Psurf_min   Psurf_max  \
0  193.64354 -53.923150 -31.744638  ...  0.016727   99407.14   99696.740   
1  193.64354 -53.923150 -

In [3]:
merged_df["ecosystem"] = "Urban-City"

In [5]:
merged_df.iloc[0:3,10:30]

Unnamed: 0,Lwnet_avg,Qle_min,Qle_max,Qle_avg,Qh_min,Qh_max,Qh_avg,Qg_min,Qg_max,Qg_avg,Snowf_min,Snowf_max,Snowf_avg,Rainf_min,Rainf_max,Rainf_avg,Evap_min,Evap_max,Evap_avg,Qs_acc_min
0,-41.843624,109.06175,152.98096,125.803246,10.713118,38.366722,26.437057,-0.807152,0.618713,-0.23686,0.0,0.0,0.0,2.8e-05,0.000133,7e-05,4.4e-05,6.1e-05,5e-05,0.005627
1,-41.843624,109.06175,152.98096,125.803246,10.713118,38.366722,26.437057,-0.807152,0.618713,-0.23686,0.0,0.0,0.0,2.8e-05,0.000133,7e-05,4.4e-05,6.1e-05,5e-05,0.005627
2,-40.746574,109.37356,154.90436,126.68064,14.367916,34.853592,24.47467,-0.794986,0.745487,-0.178876,0.0,0.0,0.0,2.9e-05,0.000135,6.9e-05,4.4e-05,6.2e-05,5.1e-05,0.006062
