In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Import packages
import pandas as pd, numpy as np
import dypy.netcdf as dn
import dypy.intergrid as ig
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import ndimage

# Custom functions
from utils_CESM import *

# Import constants from constants.py file in directory above
import sys
sys.path.append("..")
from constants import *

--- Avaliable variables ---
Base directory: BASE_DIR
Month names: MONTH_NAMES
Bounding box coordinates: LON_MIN, LON_MAX, LAT_MIN, LAT_MAX
ERAI coordinates: LONS_ERA, LATS_ERA
CESM coordinates: LONS_CESM, LATS_CESM
CESM slice: INDEX_CESM
CESM coordinates for plotting (string): LONS_CESM_STRING, LATS_CESM_STRING


# Define all CESM paths to read and check whether they exist

In [3]:
# Create paths to all CESM files with 6 hourly resolution
years = [str(year) for year in range(2091,2100+1)]
ensemble_nr_list = [f"00{i}" if i < 10 else f"0{i}" for i in range(1,35+1)]
paths = [f"/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.BRCP85LENS.f09_g16.ethz.{ensemble_nr}/archive/atm/hist/b.e112.BRCP85LENS.f09_g16.ethz.{ensemble_nr}.cam.h2.{year}-01-01-21600.nc" for ensemble_nr in ensemble_nr_list for year in years]

# Loop over all paths for ensemble members and years. Check whether they exist and reading works
for i, path in enumerate(paths):
    dn.read_var(path, "hyam")
    print(f"{i//10+1}.{i%10+1} worked", end="\r")

35.10 worked

# Read CESM-p files

In [4]:
rows = read_all_CESM_files(paths, INDEX_CESM, LATS_CESM_STRING, LONS_CESM_STRING)

/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.BRCP85LENS.f09_g16.ethz.001/archive/atm/hist/b.e112.BRCP85LENS.f09_g16.ethz.001.cam.h2.2091-01-01-21600.nc
/net/litho/atmosdyn/INTEXseas/cesm/cesm112_LENS/b.e112.BRCP85LENS.f09_g16.ethz.001/archive/atm/hist/b.e112.BRCP85LENS.f09_g16.ethz.001.cam.h2.2092-01-01-21600.nc


In [5]:
# Turn into pandas dataframe
df_raw = pd.DataFrame(rows)

# Post-Processing

In [6]:
# Drop all columns which show missing values 
df = df_raw.copy()
df = df.dropna(axis=1)

In [8]:
# Add date and ensemble member column
dates, ensembles = create_date_and_ensemble_columns(years)
df['date'] = dates
df["ensemble"] = ensembles
df.head(5)

Length of ensemble member column:  511000
Length of date column:  511000


Unnamed: 0,SLP_4287_0_sealevel,SLP_4287_125_sealevel,SLP_4287_250_sealevel,SLP_4287_375_sealevel,SLP_4287_500_sealevel,SLP_4287_625_sealevel,SLP_4287_750_sealevel,SLP_4287_875_sealevel,SLP_4287_1000_sealevel,SLP_4287_1125_sealevel,...,U_4947_625_850,U_4947_750_850,U_4947_875_850,U_4947_1000_850,U_4947_1125_850,U_4947_1250_850,U_4947_1375_850,U_4947_1500_850,date,ensemble
0,1009.295166,1009.765259,1009.547363,1009.128174,1009.923889,1010.331726,1010.205566,1009.546875,1008.21814,1007.883179,...,3.231951,3.379141,2.935905,2.397257,2.454051,2.680598,2.839719,2.758271,2091-01-01 00:00:00,E1
1,1008.937256,1009.288574,1009.178772,1008.663513,1009.649475,1010.830261,1011.312805,1011.196045,1010.479675,1010.186462,...,3.080384,3.61998,3.709461,3.15785,2.52062,1.831804,1.249965,1.152587,2091-01-01 06:00:00,E1
2,1007.777649,1007.934937,1007.951904,1007.726379,1008.19397,1009.104431,1010.020081,1010.550049,1010.655701,1010.970947,...,2.18244,3.04174,3.503407,3.492724,2.13724,0.898738,0.349062,0.346452,2091-01-01 12:00:00,E1
3,1010.049438,1008.559814,1007.109375,1006.558411,1006.643433,1006.876099,1007.414673,1008.425781,1009.267578,1010.338196,...,-1.309341,0.130755,1.433517,2.100199,1.684203,0.707435,0.188749,0.827695,2091-01-01 18:00:00,E1
4,1009.893677,1009.016968,1006.854065,1005.185181,1004.311646,1003.264526,1003.018738,1003.50946,1004.157471,1005.455688,...,-5.073125,-3.727069,-2.602715,-1.672456,-1.204065,-1.679406,-1.851561,-1.163715,2091-01-02 00:00:00,E1


In [None]:
# Write outputs
df.to_csv(os.path.join(BASE_DIR, "data", "MeteorologicalData", "CESM_future_day.csv.gz", index=False)