In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Import packages
import pandas as pd, numpy as np
import dypy.netcdf as dn
import dypy.intergrid as ig
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import ndimage

# Custom functions
from utils_CESM import *

# Import constants from constants.py file in directory above
import sys
sys.path.append("..")
from constants import *

--- Avaliable variables ---
Base directory: BASE_DIR
Month names: MONTH_NAMES
Bounding box coordinates: LON_MIN, LON_MAX, LAT_MIN, LAT_MAX
ERAI coordinates: LONS_ERA, LATS_ERA
CESM coordinates: LONS_CESM, LATS_CESM
CESM slice: INDEX_CESM
CESM coordinates for plotting (string): LONS_CESM_STRING, LATS_CESM_STRING


# Define all CESM paths to read and check whether they exist

In [3]:
# Create paths to all CESM files with 6 hourly resolution
years = [str(year) for year in range(1991,2000+1)]
ensemble_nr_list = [f"00{i}" if i < 10 else f"0{i}" for i in range(1,35+1)]
paths = [f"/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.B20TRLENS.f09_g16.ethz.{ensemble_nr}/archive/atm/hist/b.e112.B20TRLENS.f09_g16.ethz.{ensemble_nr}.cam.h2.{year}-01-01-21600.nc" for ensemble_nr in ensemble_nr_list for year in years]

# Loop over all paths for ensemble members and years. Check whether they exist and reading works
for i, path in enumerate(paths):
    dn.read_var(path, "hyam")
    print(f"{i//10+1}.{i%10+1} worked", end="\r")

35.10 worked

# Read CESM-p files

In [4]:
rows = read_all_CESM_files(paths, INDEX_CESM, LATS_CESM_STRING, LONS_CESM_STRING)

/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.B20TRLENS.f09_g16.ethz.001/archive/atm/hist/b.e112.B20TRLENS.f09_g16.ethz.001.cam.h2.1991-01-01-21600.nc
/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.B20TRLENS.f09_g16.ethz.001/archive/atm/hist/b.e112.B20TRLENS.f09_g16.ethz.001.cam.h2.1992-01-01-21600.nc


In [5]:
# Turn into pandas dataframe
df_raw = pd.DataFrame(rows)

# Post-Processing

In [6]:
# Drop all columns which show missing values 
df = df_raw.copy()
df = df.dropna(axis=1)

In [8]:
# Add date and ensemble member column
dates, ensembles = create_date_and_ensemble_columns(years)
df['date'] = dates
df["ensemble"] = ensembles
df.head(5)

Length of ensemble member column:  511000
Length of date column:  511000


Unnamed: 0,SLP_4287_0_sealevel,SLP_4287_125_sealevel,SLP_4287_250_sealevel,SLP_4287_375_sealevel,SLP_4287_500_sealevel,SLP_4287_625_sealevel,SLP_4287_750_sealevel,SLP_4287_875_sealevel,SLP_4287_1000_sealevel,SLP_4287_1125_sealevel,...,U_4947_625_850,U_4947_750_850,U_4947_875_850,U_4947_1000_850,U_4947_1125_850,U_4947_1250_850,U_4947_1375_850,U_4947_1500_850,date,ensemble
0,1027.875366,1028.249023,1028.119629,1026.509277,1026.485229,1027.50293,1027.563843,1027.570801,1027.40918,1027.008667,...,0.182726,-0.334366,-1.207864,-1.537046,-1.513066,-1.571769,-1.543017,-1.652972,1991-01-01 00:00:00,E1
1,1029.803467,1030.449341,1029.541748,1028.484131,1028.848145,1029.079468,1029.063232,1029.083862,1028.906616,1028.601196,...,0.960574,1.241237,1.115772,1.066674,1.311498,1.34913,1.16522,0.619885,1991-01-01 06:00:00,E1
2,1030.819092,1030.908325,1030.025513,1029.426514,1029.475586,1029.454834,1029.430054,1029.554688,1029.671509,1029.631348,...,2.442867,3.359064,3.627144,2.981276,2.580138,2.44459,2.127206,1.334364,1991-01-01 12:00:00,E1
3,1033.883545,1033.190674,1032.149048,1031.638794,1031.25769,1031.035645,1030.946411,1031.17395,1031.435791,1031.125732,...,4.764948,5.511599,5.411247,4.374365,3.206755,2.41285,2.057547,1.86733,1991-01-01 18:00:00,E1
4,1035.513794,1034.349121,1032.994019,1032.683228,1031.957031,1031.71106,1031.614136,1032.110229,1031.929565,1031.497559,...,8.485958,8.428128,8.452509,7.947867,6.99854,6.017332,5.378965,4.801532,1991-01-02 00:00:00,E1


In [None]:
# Write outputs
df.to_csv(os.path.join(BASE_DIR, "data", "MeteorologicalData", "CESM_present_day.csv.gz", index=False)