# Import packages

In [1]:
import pandas as pd, numpy as np
import dypy.netcdf as dn
import dypy.intergrid as ig
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import ndimage
import os
import re

# Define constants

In [2]:
# W-E direction
LON_MIN_ERA= 0
LON_MAX_ERA = 15

# S-N direction
LAT_MIN_ERA= 42
LAT_MAX_ERA = 50

In [3]:
path = "/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.B20TRLENS.f09_g16.ethz.001/archive/atm/hist/b.e112.B20TRLENS.f09_g16.ethz.001.cam.h2.1990-01-01-21600.nc"

lons, lats = dn.read_var(path, ["lon", "lat"])

xindex = np.where((lons >= LON_MIN_ERA) & (lons <= LON_MAX_ERA))[0]
print("xindex: " + str(xindex))
print("lons: " + str(lons[xindex]))
yindex = np.where((lats >= LAT_MIN_ERA) & (lats <= LAT_MAX_ERA))[0]
print("yindex: " + str(yindex))
print("lats: " + str(lats[yindex]))
xmin, xmax = xindex.min(), xindex.max()
print("xmin: " + str(xmin))
print("xmax: " + str(xmax))
ymin, ymax = yindex.min(), yindex.max()
print("ymin: " + str(ymin))
print("ymax: " + str(ymax))

index = np.s_[:, :, ymin:(ymax+1), xmin:(xmax+1)]

lo = np.array([LAT_MIN_ERA, LON_MIN_ERA])
hi = np.array([LAT_MAX_ERA, LON_MAX_ERA])

query_points = [[lat, lon] for lat in lats[yindex] for lon in lons[xindex]]
query_points_labels = [[str(int(100*query_point[0])), str(int(100*query_point[1]))] for query_point in query_points]
print(query_points_labels)

xindex: [ 0  1  2  3  4  5  6  7  8  9 10 11 12]
lons: [ 0.    1.25  2.5   3.75  5.    6.25  7.5   8.75 10.   11.25 12.5  13.75
 15.  ]
yindex: [141 142 143 144 145 146 147 148]
lats: [42.87958115 43.82198953 44.76439791 45.70680628 46.64921466 47.59162304
 48.53403141 49.47643979]
xmin: 0
xmax: 12
ymin: 141
ymax: 148
[['4287', '0'], ['4287', '125'], ['4287', '250'], ['4287', '375'], ['4287', '500'], ['4287', '625'], ['4287', '750'], ['4287', '875'], ['4287', '1000'], ['4287', '1125'], ['4287', '1250'], ['4287', '1375'], ['4287', '1500'], ['4382', '0'], ['4382', '125'], ['4382', '250'], ['4382', '375'], ['4382', '500'], ['4382', '625'], ['4382', '750'], ['4382', '875'], ['4382', '1000'], ['4382', '1125'], ['4382', '1250'], ['4382', '1375'], ['4382', '1500'], ['4476', '0'], ['4476', '125'], ['4476', '250'], ['4476', '375'], ['4476', '500'], ['4476', '625'], ['4476', '750'], ['4476', '875'], ['4476', '1000'], ['4476', '1125'], ['4476', '1250'], ['4476', '1375'], ['4476', '1500'], ['4570'

# Define functions

In [4]:
def create_heatmap(grid, lats, lons, annot_bool=False):
    df = pd.DataFrame(grid, index=lats, columns=lons)

    fig = plt.figure(figsize=(15,9))
    sns.heatmap(df, annot=annot_bool)

    # Due to bug in matplotlib
    b, t = plt.ylim()
    b += 0.5
    t -= 0.5
    plt.ylim(b, t)

    plt.show()

# Recursively get all relevant file paths

In [5]:
rootdir = "/net/bio/atmosdyn/erainterim/cdf/"
relevant_file_paths = []
for root, subdirs, files in os.walk(rootdir):
    if len(root.split("/")) == 8:
        if (root.split("/")[6] != "1979") & (root.split("/")[6] != "1980") :
            regex = re.compile('P\w+')
            relevant_file_paths_local = [os.path.join(root, netcdf_file) for netcdf_file in files if regex.match(netcdf_file) ]
            relevant_file_paths.extend(relevant_file_paths_local)

# Read in all relevant ERAI files

In [6]:
def get_interpolated_variables(grid_values, variable, index_level, pressure_level):
    grid_values_cut = grid_values[index_level][(LAT_MIN_ERA+90):(LAT_MAX_ERA+90+1),(180+LON_MIN_ERA):(180+LON_MAX_ERA+1)]
    interfunc = ig.Intergrid(grid_values_cut, lo=lo, hi=hi, verbose = False)
    query_values = interfunc(query_points)
    
#     feature_names = [f"{variable}_{query_point[0]}_{query_point[1]}_{pressure_level}" for query_point in query_points_labels]
    return query_values #zip(feature_names, query_values)

In [7]:
feature_list_names =["date"]
feature_list_names.extend([f"SLP_{query_point[0]}_{query_point[1]}_sealevel" for query_point in query_points_labels])
feature_list_names.extend([f"T_{query_point[0]}_{query_point[1]}_900" for query_point in query_points_labels])
for pressure_level in ["850", "700", "500"]:
    for variable in ['Z','T','Q','U','V']:
        feature_list_names.extend([f"{variable}_{query_point[0]}_{query_point[1]}_{pressure_level}" for query_point in query_points_labels])
print(len(feature_list_names))

1769


In [8]:
rows_list=[]
for file_path in relevant_file_paths:
    print(file_path, end="\r")
#     feature_dict = {"date": file_path[-11:]}
    feature_list= [file_path[-11:]]
    
    SLP, = dn.read_var(file_path, ['SLP'])
#     feature_dict.update(get_interpolated_variables(grid_values=np.expand_dims(SLP,0), 
#                                                    variable="SLP", 
#                                                    index_level = 0, 
#                                                    pressure_level="sealevel"))

    feature_list.extend(get_interpolated_variables(grid_values=np.expand_dims(SLP,0), 
                                                   variable="SLP", 
                                                   index_level = 0, 
                                                   pressure_level="sealevel"))
    # Switch to corresponding Z file
    file_path = file_path.replace("P", "Z")
    
    Z,T,Q,U,V = dn.read_var(file_path, ['Z','T','Q','U','V'])
    
#     feature_dict.update(get_interpolated_variables(grid_values=T, 
#                                                    variable="T", 
#                                                    index_level = 0, 
#                                                    pressure_level="900"))
    feature_list.extend(get_interpolated_variables(grid_values=T, 
                                                   variable="T", 
                                                   index_level = 0, 
                                                   pressure_level="900"))

    for index, pressure_level in {1: "850", 3: "700", 5: "500"}.items():
        
#         feature_dict.update(get_interpolated_variables(grid_values=Z, 
#                                                    variable="Z", 
#                                                    index_level = index, 
#                                                    pressure_level= pressure_level))
        
#         feature_dict.update(get_interpolated_variables(grid_values=T, 
#                                                    variable="T", 
#                                                    index_level = index, 
#                                                    pressure_level= pressure_level))
        
#         feature_dict.update(get_interpolated_variables(grid_values=Q, 
#                                                    variable="Q", 
#                                                    index_level = index, 
#                                                    pressure_level= pressure_level))
        
#         feature_dict.update(get_interpolated_variables(grid_values=U, 
#                                                    variable="U", 
#                                                    index_level = index, 
#                                                    pressure_level= pressure_level))
        
#         feature_dict.update(get_interpolated_variables(grid_values=V, 
#                                                    variable="V", 
#                                                    index_level = index, 
#                                                    pressure_level= pressure_level))
        feature_list.extend(get_interpolated_variables(grid_values=Z, 
                                                   variable="Z", 
                                                   index_level = index, 
                                                   pressure_level= pressure_level))
        
        feature_list.extend(get_interpolated_variables(grid_values=T, 
                                                   variable="T", 
                                                   index_level = index, 
                                                   pressure_level= pressure_level))
        
        feature_list.extend(get_interpolated_variables(grid_values=Q, 
                                                   variable="Q", 
                                                   index_level = index, 
                                                   pressure_level= pressure_level))
        
        feature_list.extend(get_interpolated_variables(grid_values=U, 
                                                   variable="U", 
                                                   index_level = index, 
                                                   pressure_level= pressure_level))
        
        feature_list.extend(get_interpolated_variables(grid_values=V, 
                                                   variable="V", 
                                                   index_level = index, 
                                                   pressure_level= pressure_level))
        
        


    rows_list.append(feature_list)
    
df = pd.DataFrame(rows_list)
df["date"] = pd.to_datetime(df["date"], format='%Y%m%d_%H')
df.tail()

/net/bio/atmosdyn/erainterim/cdf/1981/01/P19810101_00<class 'numpy.ndarray'>


KeyError: 'date'

In [None]:
# Write recipe outputs
df.to_csv("/net/litho/atmosdyn2/chmony/data/MeteorologicalData/ERAI_data.csv.gz", index=False)