In [13]:
import xarray as xr
import dask
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
import glob
import pandas as pd
import os

def spatial_mean(da):
    """Computes the spatial mean if lat and lon dimensions are present."""
    if 'lat' in da.dims and 'lon' in da.dims:
        return da.mean(dim=['lat', 'lon'])
    return da

# Convert cftime.DatetimeNoLeap to numpy.datetime64
def convert_time(ds):
    ds['time'] = [pd.Timestamp(time.strftime()) for time in ds['time'].values]
    return ds


# Data Retrieval
    ## Select either Monethly (h0) daily (h1) or Hourly(h2) using wild card

In [14]:
# Directory where the netCDF files are located
data_directory = '/Trex/case_results/i.e215.I2000Clm50SpGs.hw_production.02/sim_results/monthly'  # Current directory. Adjust this if your files are elsewhere.

# File pattern
# h2 is two years of daily files, each file contains 24 hourly output  
file_pattern = "i.e215.I2000Clm50SpGs.hw_production.02.clm2.h0.198*nc" #1985 to 1989 5 years of data

file_path_pattern = os.path.join(data_directory, file_pattern)

## Get the list of files using the pattern

In [15]:
file_list = glob.glob(file_path_pattern)
file_list.sort(key=lambda x: os.path.basename(x).split('.')[-2])


In [16]:
# os.listdir(directory)
file_list

## Open the files using Dask and Xarray

In [17]:
ds = xr.open_mfdataset(file_list, combine='by_coords', engine='netcdf4')
# ds = xr.open_mfdataset(file_list, combine='by_coords', engine='netcdf4')

## Convert the cftime.DatetimeNoLeap objects to pandas Timestamps

In [18]:
ds = convert_time(ds)

## examine the xarray object

In [19]:
ds

# Define Variables to Report

In [20]:
variables = ['TSA', 'RH2M', 'RAIN']

#  Filter global summer

In [21]:
# Filter dataset by the year 2000
ds_sel = ds # ds.sel(time=slice('1985', '1994'))
ds_sel

In [26]:
def set_unwanted_to_nan(ds):
    # Condition for JJA in the Northern Hemisphere
    condition_jja_nh = (ds['time.season'] == 'JJA') & (ds['lat'] >= 0)

    # Condition for DJF in the Southern Hemisphere
    condition_djf_sh = (ds['time.season'] == 'DJF') & (ds['lat'] < 0)

    # # Set grid cells to NaN where TSA_U is null
    # condition_tsa_u_not_null = ds['TSA_U'].notnull()

    # Combine conditions for the desired data, set others to NaN
    condition = (condition_jja_nh | condition_djf_sh) #& condition_tsa_u_not_null

    # Apply condition, keeping structure intact
    ds_filtered = ds.where(condition, drop=True)

    return ds_filtered

In [27]:
ds_summer = set_unwanted_to_nan(ds_sel)
ds_summer

In [29]:
ds_summer.isel(time=0).dropna(dim='lat', how='all')  #printonly

In [25]:
#average of RAIN variable for summer
ds_rain_mean = ds_summer['RAIN'].mean(dim='time')
ds_rain_mean.plot()

In [35]:
ds_summer['SNOW'].mean(dim='time').plot()

In [31]:
ds_summer['Precip'] = ds_summer['RAIN'] + ds_summer['SNOW']

In [40]:
ds_summer['Precip']

In [51]:
# Calculate the number of days in each month
days_in_month = ds_summer.time.dt.days_in_month

# Create a new DataArray that weights the Precip values by the number of days in each month
weighted_precip = ds_summer['Precip'] * days_in_month

# Group by year and sum to get the total annual Precip value, weighted by days
annual_precip = weighted_precip.groupby('time.year').sum(dim='time')

# The result is an xarray DataArray with annual total values for 'Precip'
year(annual_precip.mean(dim='year')* 3600 * 24) # printonly

In [39]:
ds_summer['Precip'].mean().values * 3600 * 24

In [None]:

#average for yearly total RAIN
yearly_avg_rain: xr.core.dataarray.DataArray = ds['RAIN'].sum(dim='time')/10.0
yearly_avg_rain = yearly_avg_rain.compute() 

In [None]:
yearly_avg_rain

In [None]:
df =yearly_avg_rain.to_dataframe(name='RAIN')
df