In [None]:
import pandas as pd, numpy as np
import dypy.netcdf as dn
import dypy.intergrid as ig
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import ndimage

# Define constants

In [None]:
ensemble_nr_list = []
for i in range(1,35+1):
    if i < 10:
        ensemble_nr_list.append(f"00{i}")
    else:
        ensemble_nr_list.append(f"0{i}")

years = np.arange(2091,2100+1)
years = [str(year) for year in years]

paths = [f"/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.BRCP85LENS.f09_g16.ethz.{ensemble_nr}/archive/atm/hist/b.e112.BRCP85LENS.f09_g16.ethz.{ensemble_nr}.cam.h2.{year}-01-01-21600.nc" for ensemble_nr in ensemble_nr_list for year in years]

for i,path in enumerate(paths):
    np.array(dn.read_var(path, "hyam"))[0]
    print(f"{i//10+1}.{i%10+1} worked")

In [None]:
#paths = ["/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.BRCP85LENS.f09_g16.ethz.001/archive/atm/hist/b.e112.BRCP85LENS.f09_g16.ethz.001.cam.h2.2091-01-01-21600.nc"]

lon_min_CESM = 0
lon_max_CESM = 15
lat_min_CESM = 42
lat_max_CESM = 50

lons, lats = dn.read_var(paths[0], ["lon", "lat"])

xindex = np.where((lons >= lon_min_CESM) & (lons <= lon_max_CESM))[0]
print("xindex: " + str(xindex))
print("lons: " + str(lons[xindex]))
yindex = np.where((lats >= lat_min_CESM) & (lats <= lat_max_CESM))[0]
print("yindex: " + str(yindex))
print("lats: " + str(lats[yindex]))
xmin, xmax = xindex.min(), xindex.max()
print("xmin: " + str(xmin))
print("xmax: " + str(xmax))
ymin, ymax = yindex.min(), yindex.max()
print("ymin: " + str(ymin))
print("ymax: " + str(ymax))

index = np.s_[:, :, ymin:(ymax+1), xmin:(xmax+1)]

lats_labels = [str(int(100*lat)) for lat in lats[yindex]]
lons_labels = [str(int(100*lon)) for lon in lons[xindex]]
print(lats_labels)
print(lons_labels)

lats_amount = len(lats_labels)
lons_amount = len(lons_labels)

# Read CESM-f files

In [None]:
rows_list = []
for filepath in paths:
    print(filepath)

    hyam, hybm = np.array(dn.read_var(filepath, ["hyam", "hybm"]))
    SLP_values, PS_values =  np.array(dn.read_var(filepath, ["PSL", "PS"], index=index))
    T_values, V_values, U_values, Z_values = np.array(dn.read_var(filepath, ["T", "V", "U", "Z3"], index=index))

    for time_point in range(0,1460):
        print(time_point)
        feature_dict = {}

        P3_t = np.tensordot(hyam, 100000*np.ones((lats_amount, lons_amount)), axes=0) + np.tensordot(hybm, PS_values[time_point], axes=0)

        # Retrieve temperature at 900 hPa
        index_lvl = np.expand_dims((P3_t < 90000).argmin(axis=0), axis=0)
        pressure_lvl_too_low_mask = np.where(index_lvl == 0, True, False)[0]

        p_2 = np.take_along_axis(P3_t, index_lvl, axis=0)[0]
        p_1 = np.take_along_axis(P3_t, index_lvl-1, axis=0)[0]

        T_2 = np.take_along_axis(T_values[time_point], index_lvl, axis=0)[0]
        T_1 = np.take_along_axis(T_values[time_point], index_lvl-1, axis=0)[0]

        delta_p = (90000-p_1)/(p_2-p_1)
        T_interpol = (T_2-T_1)*delta_p + T_1

        T_interpol[pressure_lvl_too_low_mask] = np.NaN

        feature_names = [f"T_{lat}_{lon}_900" for lat in lats_labels for lon in lons_labels]
        feature_dict.update(zip(feature_names, T_interpol.flatten()))


        for p in [50000, 70000, 85000]:

            index_lvl = np.expand_dims((P3_t < p).argmin(axis=0), axis=0)
            pressure_lvl_too_low_mask = np.where(index_lvl == 0, True, False)[0]

            p_2 = np.take_along_axis(P3_t, index_lvl, axis=0)[0]
            p_1 = np.take_along_axis(P3_t, index_lvl-1, axis=0)[0]

            T_2 = np.take_along_axis(T_values[time_point], index_lvl, axis=0)[0]
            T_1 = np.take_along_axis(T_values[time_point], index_lvl-1, axis=0)[0]

            V_2 = np.take_along_axis(V_values[time_point], index_lvl, axis=0)[0]
            V_1 = np.take_along_axis(V_values[time_point], index_lvl-1, axis=0)[0]

            Z_2 = np.take_along_axis(Z_values[time_point], index_lvl, axis=0)[0]
            Z_1 = np.take_along_axis(Z_values[time_point], index_lvl-1, axis=0)[0]

            U_2 = np.take_along_axis(U_values[time_point], index_lvl, axis=0)[0]
            U_1 = np.take_along_axis(U_values[time_point], index_lvl-1, axis=0)[0]

            delta_p = (p-p_1)/(p_2-p_1)
            T_interpol = (T_2-T_1)*delta_p + T_1
            V_interpol = (V_2-V_1)*delta_p + V_1
            Z_interpol = (Z_2-Z_1)*delta_p + Z_1
            U_interpol = (U_2-U_1)*delta_p + U_1

            T_interpol[pressure_lvl_too_low_mask] = np.NaN
            V_interpol[pressure_lvl_too_low_mask] = np.NaN
            Z_interpol[pressure_lvl_too_low_mask] = np.NaN
            U_interpol[pressure_lvl_too_low_mask] = np.NaN

            feature_names = [f"T_{lat}_{lon}_{p//100}" for lat in lats_labels for lon in lons_labels]
            feature_dict.update(zip(feature_names, T_interpol.flatten()))

            feature_names = [f"V_{lat}_{lon}_{p//100}" for lat in lats_labels for lon in lons_labels]
            feature_dict.update(zip(feature_names, V_interpol.flatten()))

            feature_names = [f"Z_{lat}_{lon}_{p//100}" for lat in lats_labels for lon in lons_labels]
            feature_dict.update(zip(feature_names, Z_interpol.flatten()))

            feature_names = [f"U_{lat}_{lon}_{p//100}" for lat in lats_labels for lon in lons_labels]
            feature_dict.update(zip(feature_names, U_interpol.flatten()))

        SLP_values_t = SLP_values[time_point]/100

        feature_names = [f"SLP_{lat}_{lon}_sealevel" for lat in lats_labels for lon in lons_labels]
        feature_dict.update(zip(feature_names, SLP_values_t.flatten()))

        rows_list.append(feature_dict)


df = pd.DataFrame(rows_list)
df.dropna(axis=1, inplace = True)
df.tail()

# Post-Processing

In [None]:
months = np.arange(1,12+1)
months = [str(month) for month in months]

days_in_month = {"01": 31,
                 "02": 28,
                 "03": 31,
                 "04": 30,
                 "05": 31,
                 "06": 30,
                 "07": 31,
                 "08": 31,
                 "09": 30,
                 "10": 31,
                 "11": 30,
                 "12": 31,
                }

hours =["00", "06", "12", "18"]

for i in range(0,9):
    months[i] = "0" + months[i]

dates = [year + "-" + month + "-" + str(day) + " " + hour+ ":00" for year in years for month in months for day in range(1, days_in_month[month]+1) for hour in hours]

ensembles = [f"E{nr}" for nr in range(1,35+1) for i in range(len(dates))]
print(len(ensembles))
print(len(dates))

dates = dates*35
print(len(dates))

df_dates = pd.DataFrame(dates, columns=["date"])
df_ensembles = pd.DataFrame(ensembles, columns=["ensemble"])
df['date'] = pd.to_datetime(df_dates['date'], format="%Y-%m-%d %H:%M")
df["ensemble"] = df_ensembles["ensemble"]
df.head(100)

In [None]:
# Write recipe outputs
df.to_csv("CESMf_data.csv", index=False)