# What is this notebook?

This makes the properly formatted observation file... I suggest using this as a template if you would like to create your own observation file 

# What do you need to run this? 

# What should I end up with? 

## Load in Necessary Libraries

In [2]:
import yaml
import os
import glob
import xarray as xr
import numpy as np
import eofs.standard as Eof_st
from eofs.multivariate.standard import MultivariateEof
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import copy
import re
from datetime import datetime


## Settings

In [3]:
#where is your obs file? 
obs_filez = '/glade/scratch/wchapman/ERA5_uvolr/1deg/ERA5_U_V_TTR_1998_2022.nc'

In [None]:
def cal_ano_dcli(var):
    '''return [var_ano, var_dcli], Taxis= axis of time'''
    var_dcli=var.groupby('time.dayofyear').mean(dim='time')
    var_ano= var.groupby('time.dayofyear') - var_dcli    
    return var_ano, var_dcli

#grab selected level's...
DS_MJOera5 = xr.open_dataset(obs_filez) #
DS_MJOera5['u200']=DS_MJOera5['U'].sel(level=200)
DS_MJOera5['u850']=DS_MJOera5['U'].sel(level=850)
DS_MJOera5['v200']=DS_MJOera5['V'].sel(level=200)
DS_MJOera5['v850']=DS_MJOera5['V'].sel(level=850)

#test for NAN values... adjust variable names: 
nan_time_slice = DS_MJOera5.u200.where(np.isnan(DS_MJOera5.u200), drop=True)
if len(nan_time_slice.time)>0:
    nan_time_slice_u200 = DS_MJOera5.u200.where(np.isnan(DS_MJOera5.u200), drop=True)
    print('check u200 for nans')
    #add your fix for NANs here: 
    time_indices = np.where(DS_MJOera5['u200'].time == np.datetime64(str(nan_time_slice_u200.time.values[0])))[0]
    DS_MJOera5['u200'][time_indices[0],:,:]=DS_MJOera5['u200'][time_indices[0]+1,:,:]
nan_time_slice = DS_MJOera5.u850.where(np.isnan(DS_MJOera5.u850), drop=True)
if len(nan_time_slice.time)>0:
    nan_time_slice_u850 = DS_MJOera5.u850.where(np.isnan(DS_MJOera5.u850), drop=True)
    print('check u850 for nans')
if len(nan_time_slice.time)>0:
    nan_time_slice_ulwrf = DS_MJOera5.ulwrf.where(np.isnan(DS_MJOera5.TTR), drop=True)
    print('check ulwrf for nans')


## adjust the following if needed... must have variables u,u,ttr
# DS_MJOera5 = DS_MJOera5.mean('latitude')
u200era20c = DS_MJOera5.u200
u200era20c = u200era20c.rename('u')
u850era20c = DS_MJOera5.u850
u850era20c = u850era20c.rename('u')
uOLRera20c = DS_MJOera5.TTR
uOLRera20c = uOLRera20c.rename('ttr')

## Process Three Variables"

In [None]:
%%time
OLR = DS_MJOera5.TTR.to_dataset().rename_vars({'TTR':'olr'}).sel(lat=slice(16,-16))
U200 = DS_MJOera5.u200.to_dataset().rename_vars({'u200':'uwnd'}).sel(lat=slice(16,-16))
U850 = DS_MJOera5.u850.to_dataset().rename_vars({'u850':'uwnd'}).sel(lat=slice(16,-16))

OLR = OLR.mean('lat')
U200 = U200.mean('lat')
U850 = U850.mean('lat')

print(' ----- Computing daily anomalies + climatologies ----- ')
OLR_anom,OLR_clim = cal_ano_dcli(OLR)
U850_anom,U850_clim = cal_ano_dcli(U850)
U200_anom,U200_clim = cal_ano_dcli(U200)

print(' ----- Removing interannual variability (120d rolling mean) ----- ')
OLR_anom2 = OLR_anom - OLR_anom.rolling(time=120, center=False).mean().dropna('time')
print('...1...')
U850_anom2 = U850_anom - U850_anom.rolling(time=120, center=False).mean().dropna('time')
print('...2...')
U200_anom2 = U200_anom - U200_anom.rolling(time=120, center=False).mean().dropna('time')
print('...done...')

DSanom = xr.merge([U850_anom.rename({'uwnd':'uwnd850'}),U200_anom.rename({'uwnd':'uwnd200'}),OLR_anom]).to_netcdf('./Observations/ERA5_Meridional_Mean_Anomaly.nc')
DSanom_filt = xr.merge([U850_anom.rename({'uwnd':'uwnd850'}),U200_anom.rename({'uwnd':'uwnd200'}),OLR_anom]).to_netcdf('./Observations/ERA5_Meridional_Mean_Anomaly_Filtered120.nc')