In [45]:
import xarray as xr
import numpy as np
import pandas as pd
import dask.array as da
import zarr
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from utils import DataFinder, MetricCalculation

In [60]:
# make zarr store
path = "results/zonal_means/model_zonal_mean.zarr"

models=["FGOALS-g3", "CanESM5", "ACCESS-CM2", "EC-Earth3-Veg", "EC-Earth3-Veg-LR", "FIO-ESM-2-0", "IPSL-CM6A-LR", "MIROC6", "MPI-ESM1-2-LR", "MRI-ESM2-0", "CESM2-WACCM", "NorESM2-LM", "KACE-1-0-G", "GFDL-ESM4"]
regions = {'global':[-90,90], "northern_hemisphere":[0,90], "southern_hemisphere":[-90,0], "tropics":[-30,30]}

time = pd.date_range(start='1960-01-01',end='2024-12-01',freq='MS')

data = da.zeros((len(time),len(models),len(regions)),chunks=(-1,1,1))

ds = xr.DataArray(
    data=data,
    dims=["time", "model", "region"],
    coords=dict(
        time=time,
        model=models,
        region=list(regions.keys()),
    ),
)

ds_template = xr.merge([
    ds.to_dataset(name='tas'),
    ds.to_dataset(name='pr'),
    ds.to_dataset(name='clt'),
    ds.to_dataset(name='od550aer'),
    ds.to_dataset(name='tos'),
])


# ds_template.to_zarr(path,compute=False)

In [None]:
# for model in ["CESM2-WACCM", "NorESM2-LM", "KACE-1-0-G", "GFDL-ESM4"]:
#     for var in ['tas','pr','clt','od550aer','tos']:
#         try:
#             data_finder = DataFinder(model=model,variable=var,start_year=1960,end_year=2024)
#             model_ds = data_finder.load_model_ds()
#             fx_ds = data_finder.load_cell_area_ds()
#             print('data loaded, beginning calcs')
#             for region, lat_bnds in regions.items():
#                 weights = fx_ds.where(fx_ds.lat > lat_bnds[0])
#                 weights = weights.where(weights.lat < lat_bnds[1])

#                 zmean_ds = model_ds.weighted(weights.fillna(0)).mean(dim=['lat','lon'])
#                 zmean_ds = zmean_ds.expand_dims({'model':[model],'region':[region]})
#                 print(f'saving data for region: {region}')
#                 zmean_ds.chunk({'time':-1,'model':1,'region':1}).to_zarr(path,region='auto')
#         except:
#             print(f'data not found for {model}, {var}')

In [61]:
####### LOOK AT ZONAL MEAN TIME SERIES #######


ds = xr.open_zarr(path,chunks={})
ds = ds.sel(region='global').od550aer
# ds = ds.groupby('time.year').mean()

fig = go.Figure()

for model in ds.model.values.tolist():

    line_ds = ds.sel(model=model)
    fig.add_trace(go.Scatter(
        x=line_ds['time'].values,
        y=line_ds,
        mode='lines',
        name=model
    ))

fig.update_layout(
    title='Zonal Mean Comparison',
    xaxis_title='Time',
    yaxis_title='Value',
    width=1300,
    height=500,
    legend_title='Dataset'
)

fig.show()


The codec `vlen-utf8` is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.



In [None]:
# DONE -- need to redo GFDL, MPI for everything
# DONE -- check MIROC6
# DONE -- fix FIO clt (scaling)
# tos didn't work -- fixed for most, except KACE-1-0-G. should also check MPI (looks low again)

# MPI weighting is off. compared to canesm5, and total area is low. Canesm global mean was the same using areacella data and cos(lat) as weights.
# MPI global mean was much less when using areacella vs cos(lat) weights

# same with GFDL. shape was the same but using areacella weights had a ~0.4K negative bias

In [62]:
ds = xr.open_zarr(path,chunks={})
df = ds.to_dataframe().reset_index()
df.loc[df['tas'] == 0,'tas'] = np.nan
df.loc[df['pr'] == 0,'pr'] = np.nan
df.loc[df['clt'] == 0,'clt'] = np.nan
df.loc[df['od550aer'] == 0,'od550aer'] = np.nan
df.loc[df['tos'] == 0,'tos'] = np.nan
df.to_csv('results/zonal_means/model_zonal_mean_monthly.csv')


The codec `vlen-utf8` is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.



## do same for observations (with error)

In [54]:
# make zarr store
path = "results/zonal_means/observation_zonal_mean.zarr"

regions = {'global':[-90,90], "northern_hemisphere":[0,90], "southern_hemisphere":[-90,0], "tropics":[-30,30]}

time = pd.date_range(start='1960-01-01',end='2024-12-01',freq='MS')

data = da.zeros((len(time),len(regions)),chunks=(-1,1))

ds = xr.DataArray(
    data=data,
    dims=["time", "region"],
    coords=dict(
        time=time,
        region=list(regions.keys()),
    ),
)

ds_template = xr.merge([
    ds.to_dataset(name='tas'),
    ds.to_dataset(name='tas_error'),
    ds.to_dataset(name='pr'),
    ds.to_dataset(name='pr_error'),
    ds.to_dataset(name='clt'),
    ds.to_dataset(name='od550aer'),
    ds.to_dataset(name='tos'),
    ds.to_dataset(name='tos_error'),
])


# ds_template.to_zarr(path,compute=False)

In [55]:
variable_paths = {
    'tas':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/tas_HadCRUT5.zarr',
    'pr':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/pr_noaa_gpcp.zarr',
    'tos':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/tos_noaa_oisst.zarr',
    'clt':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/clt_nasa_modis.zarr',
    'od550aer':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/od550aer_nasa_modis.zarr',
}
variable_error_paths = {
    'tas':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/tas_HadCRUT5_error.zarr',
    'pr':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/pr_noaa_gpcp_error.zarr',
    'tos':'/Users/willatobin/Documents/SIO/ClimateBench2/backend/observational_data/tos_noaa_oisst_error.zarr',
}

In [56]:
# for var, obs_path in variable_paths.items():
#     obs_ds = xr.open_zarr(obs_path,chunks={})
    
#     fx_ds = np.cos(np.deg2rad(obs_ds.lat))
#     fx_ds = fx_ds.expand_dims({"lon": obs_ds.lon})
    
#     print('data loaded, beginning calcs')
#     for region, lat_bnds in regions.items():
#         weights = fx_ds.where(fx_ds.lat > lat_bnds[0])
#         weights = weights.where(weights.lat < lat_bnds[1])

#         zmean_ds = obs_ds.weighted(weights.fillna(0)).mean(dim=['lat','lon'])
#         zmean_ds = zmean_ds.expand_dims({'region':[region]})
#         zmean_ds = zmean_ds.drop_vars(['month','realization'],errors='ignore')
#         if len(zmean_ds.time.values) < len(ds_template.time.values):
#             ds_filler = ds_template.sel(region=region,time = slice('1960-01-01',zmean_ds.time.isel(time=0).values)).isel(time=slice(0,-1))[var].to_dataset(name=var).expand_dims({'region':[region]})
#             filler_nans = np.empty((len(ds_filler.time),len(ds_filler.region)))
#             filler_nans[:] = np.nan
#             ds_filler[var] = (("time","region"),filler_nans)
#             zmean_ds = xr.concat([
#                 ds_filler,
#                 zmean_ds,
#             ],dim='time')

#         print(f'saving data for region: {region}')
#         zmean_ds.chunk({'time':-1,'region':1}).to_zarr(path,region='auto')


In [57]:
# for var, obs_path in variable_error_paths.items():
#     obs_ds = xr.open_zarr(obs_path,chunks={})
    
#     fx_ds = np.cos(np.deg2rad(obs_ds.lat))
#     fx_ds = fx_ds.expand_dims({"lon": obs_ds.lon})
    
#     print('data loaded, beginning calcs')
#     for region, lat_bnds in regions.items():
#         weights = fx_ds.where(fx_ds.lat > lat_bnds[0])
#         weights = weights.where(weights.lat < lat_bnds[1])

#         zmean_ds = obs_ds.weighted(weights.fillna(0)).mean(dim=['lat','lon'])
#         zmean_ds = zmean_ds.expand_dims({'region':[region]})
#         zmean_ds = zmean_ds.drop_vars(['month','realization'],errors='ignore')
#         if len(zmean_ds.time.values) < len(ds_template.time.values):
#             ds_filler = ds_template.sel(region=region,time = slice('1960-01-01',zmean_ds.time.isel(time=0).values)).isel(time=slice(0,-1))[var].to_dataset(name=var).expand_dims({'region':[region]})
#             filler_nans = np.empty((len(ds_filler.time),len(ds_filler.region)))
#             filler_nans[:] = np.nan
#             ds_filler[var] = (("time","region"),filler_nans)
#             zmean_ds = xr.concat([
#                 ds_filler,
#                 zmean_ds,
#             ],dim='time')

#         zmean_ds = zmean_ds[var].to_dataset(name=f"{var}_error")
#         print(f'saving data for region: {region}')
#         zmean_ds.chunk({'time':-1,'region':1}).to_zarr(path,region='auto')

In [59]:
ds = xr.open_zarr(path,chunks={})
df = ds.to_dataframe().reset_index()
df.loc[df['tas'] == 0,'tas'] = np.nan
df.loc[df['pr'] == 0,'pr'] = np.nan
df.loc[df['clt'] == 0,'clt'] = np.nan
df.loc[df['od550aer'] == 0,'od550aer'] = np.nan
df.loc[df['tos'] == 0,'tos'] = np.nan
df.to_csv('results/zonal_means/observation_zonal_mean_monthly.csv')


The codec `vlen-utf8` is currently not part in the Zarr format 3 specification. It may not be supported by other zarr implementations and may change in the future.



In [None]:
# save annual zonal means. doing sum for PR insead of mean.

In [65]:
obs_df = pd.read_csv('/Users/willatobin/Documents/SIO/ClimateBench2/backend/results/zonal_means/observation_zonal_mean_monthly.csv')

obs_df['year'] = pd.to_datetime(obs_df['time']).dt.year

obs_df_pr = obs_df[['region', 'pr_error', 'pr', 'year']]
obs_df_not_pr = obs_df[['region', 'od550aer', 'tos_error','tas_error', 'tas', 'tos', 'clt', 'year']]

obs_annual_df = obs_df_pr.groupby(['year','region']).sum().reset_index().merge(obs_df_not_pr.groupby(['year','region']).mean().reset_index())

obs_annual_df['time'] = obs_annual_df['year'].astype(str) + '-01-01'
obs_annual_df[['time','region','pr_error','pr','od550aer','tos_error','tas_error','tas','tos','clt']].to_csv('/Users/willatobin/Documents/SIO/ClimateBench2/backend/results/zonal_means/observation_zonal_mean_annual.csv')

In [66]:
model_df = pd.read_csv('/Users/willatobin/Documents/SIO/ClimateBench2/backend/results/zonal_means/model_zonal_mean_monthly.csv')

model_df['year'] = pd.to_datetime(model_df['time']).dt.year

model_df_pr = model_df[['model','region', 'pr', 'year']]
model_df_not_pr = model_df[['model','region', 'od550aer', 'tas', 'tos', 'clt', 'year']]

model_annual_df = model_df_pr.groupby(['model','year','region']).sum().reset_index().merge(model_df_not_pr.groupby(['model','year','region']).mean().reset_index())

model_annual_df['time'] = model_annual_df['year'].astype(str) + '-01-01'

model_annual_df[['time','model','region','clt','pr','od550aer','tos','tas']].to_csv('/Users/willatobin/Documents/SIO/ClimateBench2/backend/results/zonal_means/model_zonal_mean_annual.csv')
