In [2]:
import pickle
import pandas as pd
import pickle
import geopandas as gpd

In [3]:
# Read in all the geopandas dataframes for the environmental parameters
env_params = ["NO3", "NH4", "PAR_avg", "PO4", "SALT", "TEMP"]
dict_env_dfs = {env_param: gpd.GeoDataFrame(pd.read_pickle("nw_"+env_param+"_3_months_pickle.pkl")).reset_index() for env_param in env_params}

In [3]:
# Assert if they all have the same geometry
# This is needed so we can use the geometry of all dfs interchangeably
list_env_dfs_geometry = [dict_env_dfs[env_param]["geometry"] for env_param in env_params]
i = 0
while i < len(list_env_dfs_geometry) -1:
    assert list_env_dfs_geometry[i].equals(list_env_dfs_geometry[i+1])
    i += 1

In [4]:
# Create all the groupby objects
dict_env_dfs_grouped = {env_param: dict_env_dfs[env_param].groupby(["TLAT", "TLONG"]) for env_param in env_params}

In [5]:
lat_lon = list(dict_env_dfs_grouped["NO3"].groups.keys())

In [6]:
lat_lon

[(-79.2205226074621, 1.0625000295666882),
 (-79.2205226074621, 2.187500060872665),
 (-79.2205226074621, 3.312500092178642),
 (-79.2205226074621, 4.437500123484619),
 (-79.2205226074621, 5.562500154790596),
 (-79.2205226074621, 6.687500186096575),
 (-79.2205226074621, 7.812500217402551),
 (-79.2205226074621, 8.937500248708528),
 (-79.2205226074621, 10.062500280014506),
 (-79.2205226074621, 11.187500311320482),
 (-79.2205226074621, 12.312500342626459),
 (-79.2205226074621, 13.437500373932435),
 (-79.2205226074621, 14.562500405238413),
 (-79.2205226074621, 15.68750043654439),
 (-79.2205226074621, 16.812500467850366),
 (-79.2205226074621, 17.937500499156343),
 (-79.2205226074621, 19.062500530462323),
 (-79.2205226074621, 20.187500561768296),
 (-79.2205226074621, 21.312500593074276),
 (-79.2205226074621, 22.437500624380252),
 (-79.2205226074621, 23.56250065568623),
 (-79.2205226074621, 24.68750068699221),
 (-79.2205226074621, 25.81250071829819),
 (-79.2205226074621, 26.937500749604162),
 (-

In [5]:
data_dict = {}
# Itereate over all the lat_lon combos, those are the same for all environmental parameters
for lat_lon in dict_env_dfs_grouped["NO3"].groups.keys():
    list_env_param_latlon_df = []
    for env_param in env_params:
        env_param_latlon_df = dict_env_dfs_grouped[env_param].get_group(lat_lon)
        env_param_latlon_df.set_index("time", inplace=True)
        list_env_param_latlon_df.append(pd.DataFrame(env_param_latlon_df))
    concat_latlon_dfs = pd.concat(list_env_param_latlon_df, axis=1)
    # Remove duplicate columns
    concat_latlon_dfs = concat_latlon_dfs.loc[:,~concat_latlon_dfs.columns.duplicated()].copy()
    # Convert back to geodataframe before saving
    data_dict[lat_lon] = gpd.GeoDataFrame(concat_latlon_dfs)

In [6]:
data_dict[list(data_dict.keys())[0]]

Unnamed: 0_level_0,TLONG,TLAT,NO3,geometry,PAR_avg,PO4,SALT,TEMP
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0005-02-01 00:00:00,1.0625,-79.220523,,POINT (1.06250 -79.22052),,,,
0005-03-01 00:00:00,1.0625,-79.220523,,POINT (1.06250 -79.22052),,,,
0005-04-01 00:00:00,1.0625,-79.220523,,POINT (1.06250 -79.22052),,,,


In [7]:
# Make pickle out of it, so we don't have to run this every time
with open ("data_gridded_all_parameters.pkl", "wb") as handle:
    pickle.dump(data_dict, handle, protocol = pickle.HIGHEST_PROTOCOL)