# Import packages

In [0]:
import pandas as pd, numpy as np
import dypy.netcdf as dn
import dypy.intergrid as ig
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import ndimage

# Define constants

In [0]:
# W-E direction
LON_MIN_ERA= 0
LON_MAX_ERA = 15

# S-N direction
LAT_MIN_ERA= 42
LAT_MAX_ERA = 50

In [0]:
years = np.arange(1981,2019+1)
years = [str(year) for year in years]

months = np.arange(1,12+1)
months = [str(month) for month in months]

days = np.arange(1,31+1)
days = [str(day) for day in days]

hours =["00", "06", "12", "18"]

for i in range(0,9):
    months[i] = "0" + months[i]
    days[i] = "0" + days[i]

print(years)
print(months)
print(days)
print(hours)

In [0]:
path = "/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.B20TRLENS.f09_g16.ethz.001/archive/atm/hist/b.e112.B20TRLENS.f09_g16.ethz.001.cam.h2.1990-01-01-21600.nc"

lons, lats = dn.read_var(path, ["lon", "lat"])

xindex = np.where((lons >= LON_MIN_ERA) & (lons <= LON_MAX_ERA))[0]
print("xindex: " + str(xindex))
print("lons: " + str(lons[xindex]))
yindex = np.where((lats >= LAT_MIN_ERA) & (lats <= LAT_MAX_ERA))[0]
print("yindex: " + str(yindex))
print("lats: " + str(lats[yindex]))
xmin, xmax = xindex.min(), xindex.max()
print("xmin: " + str(xmin))
print("xmax: " + str(xmax))
ymin, ymax = yindex.min(), yindex.max()
print("ymin: " + str(ymin))
print("ymax: " + str(ymax))

index = np.s_[:, :, ymin:(ymax+1), xmin:(xmax+1)]

lo = np.array([LAT_MIN_ERA, LON_MIN_ERA])
hi = np.array([LAT_MAX_ERA, LON_MAX_ERA])

query_points = [[lat, lon] for lat in lats[yindex] for lon in lons[xindex]]
query_points_labels = [[str(int(100*query_point[0])), str(int(100*query_point[1]))] for query_point in query_points]
print(query_points_labels)

# Define functions

In [0]:
def create_heatmap(grid, lats, lons, annot_bool=False):
    df = pd.DataFrame(grid, index=lats, columns=lons)

    fig = plt.figure(figsize=(15,9))
    sns.heatmap(df, annot=annot_bool)

    # Due to bug in matplotlib
    b, t = plt.ylim()
    b += 0.5
    t -= 0.5
    plt.ylim(b, t)

    plt.show()

# Read in all relevant ERAI files

In [0]:
rootpath = "/net/bio/atmosdyn/erainterim/cdf/"

file_letters = ["P", "R", "Z"]

rows_list = []
for year in years:
    print("Year: " + year)
    yearpath = rootpath + year + "/"

    for month in months:
        monthpath = yearpath + month + "/"

        for day in days:

            for hour in hours:

                feature_dict = {"date": year + "-" + month +"-" + day + " " + hour + ":00"}

                for letter in file_letters:
                    filepath = monthpath + letter + year + month + day + "_" + hour

                    if letter == "P":
                        try:
                            SLP, = dn.read_var(filepath, ['SLP'])
                        except:
                            print("Couldn't read file: " + letter + year + month + day +"_" +hour)
                            feature_dict.clear()
                            break

                        SLP_cut = SLP[(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]

                        interfunc_SLP = ig.Intergrid(SLP_cut, lo=lo, hi=hi, verbose = False)
                        SLP_query_values = interfunc_SLP(query_points)

                        feature_names = ["SLP_" + query_point[0] + "_" + query_point[1] + "_sealevel" for query_point in query_points_labels]
                        feature_dict.update(zip(feature_names, SLP_query_values))


                    if letter == "Z":
                        try:
                            Z,T,Q,U,V = dn.read_var(filepath, ['Z','T','Q','U','V'])
                        except:
                            print("Couldn't read file: " + letter + year + month + day +"_" +hour)
                            feature_dict.clear()
                            break

                        T_cut = T[0][(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]
                        interfunc_T = ig.Intergrid(T_cut, lo=lo, hi=hi, verbose = False)
                        T_query_values = interfunc_T(query_points)
                        feature_names = ["T_" + query_point[0] + "_" + query_point[1] + "_900" for query_point in query_points_labels]
                        feature_dict.update(zip(feature_names, T_query_values))


                        for index, pressure_level in {1: "850", 3: "700", 5: "500"}.items():

                            Z_cut = Z[index][(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]
                            T_cut = T[index][(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]
                            Q_cut = Q[index][(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]
                            U_cut = U[index][(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]
                            V_cut = V[index][(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]

                            interfunc_Z = ig.Intergrid(Z_cut, lo=lo, hi=hi, verbose = False)
                            interfunc_T = ig.Intergrid(T_cut, lo=lo, hi=hi, verbose = False)
                            interfunc_Q = ig.Intergrid(Q_cut, lo=lo, hi=hi, verbose = False)
                            interfunc_U = ig.Intergrid(U_cut, lo=lo, hi=hi, verbose = False)
                            interfunc_V = ig.Intergrid(V_cut, lo=lo, hi=hi, verbose = False)

                            Z_query_values = interfunc_Z(query_points)
                            T_query_values = interfunc_T(query_points)
                            Q_query_values = interfunc_Q(query_points)
                            U_query_values = interfunc_U(query_points)
                            V_query_values = interfunc_V(query_points)

                            feature_names = ["Z_" + query_point[0] + "_" + query_point[1] + "_" + pressure_level for query_point in query_points_labels]
                            feature_dict.update(zip(feature_names, Z_query_values))

                            feature_names = ["T_" + query_point[0] + "_" + query_point[1] + "_" + pressure_level for query_point in query_points_labels]
                            feature_dict.update(zip(feature_names, T_query_values))

                            feature_names = ["Q_" + query_point[0] + "_" + query_point[1] + "_" + pressure_level for query_point in query_points_labels]
                            feature_dict.update(zip(feature_names, Q_query_values))

                            feature_names = ["U_" + query_point[0] + "_" + query_point[1] + "_" + pressure_level for query_point in query_points_labels]
                            feature_dict.update(zip(feature_names, U_query_values))

                            feature_names = ["V_" + query_point[0] + "_" + query_point[1] + "_" + pressure_level for query_point in query_points_labels]
                            feature_dict.update(zip(feature_names, V_query_values))


                rows_list.append(feature_dict)


df = pd.DataFrame(rows_list)
df.dropna(inplace=True)
df = df.reset_index(drop=True)
df["date"] = pd.to_datetime(df["date"], format='%Y%m%d %H:%M')
df.head()

In [0]:
# Write recipe outputs
df.to_csv("ERAI_on_CESM_grid_data.csv", index=False)