In [2]:
## import libraries
import os, sys
import numpy as np
import pandas as pd
import xarray as xr
import yaml

sys.path.append('../../mclimate_tool_cw3e')
import mclimate_funcs as mclim_func

ERROR 1: PROJ: proj_create_from_database: Open of /home/dnash/miniconda3/envs/SEAK-impacts/share/proj failed


In [3]:
path_to_data = '/expanse/nfs/cw3e/cwp140/'
path_to_out  = '../out/'       # output files (numerical results, intermediate datafiles) -- read & write
path_to_figs = '../figs/'      # figures

In [14]:
## load landslide data
fname = path_to_data + 'downloads/Landslide_Data.csv'
df = pd.read_csv(fname)
df = df.set_index(pd.to_datetime(df['Day_min']))
idx = (df['Day_min'].value_counts() > 5).index
idx[(df['Day_min'].value_counts() > 5).values]

Index(['11/20/2023', '12/1/2020', '12/2/2020', '10/20/1998', '10/26/2020',
       '8/18/2015'],
      dtype='object', name='Day_min')

In [16]:
model_name = 'GEFS_archive' # 'GEFS_archive' or 'GEFS_reforecast'

## load landslide data
fname = path_to_data + 'downloads/Landslide_Data.csv'
df = pd.read_csv(fname)
df = df.set_index(pd.to_datetime(df['Day_min']))
print(len(df.index.unique()))
if model_name == 'GEFS_reforecast':
    ## subset to 2000-2019
    idx = (df.index >= '2000-01-01') & (df.index <= '2019-12-31')
else:
    ## subset to 2020-2024
    idx = (df.index >= '2020-01-01') & (df.index <= '2024-12-31')
df = df.loc[idx]

## get unique dates - these are the impact dates
unique_dates = df.index.unique()
unique_dates.sort_values()

102


DatetimeIndex(['2020-02-27', '2020-08-17', '2020-10-24', '2020-10-26',
               '2020-10-31', '2020-11-01', '2020-11-28', '2020-11-29',
               '2020-11-30', '2020-12-01', '2020-12-02', '2021-01-18',
               '2021-06-06', '2021-08-13', '2021-12-14', '2022-01-12',
               '2022-01-19', '2022-01-21', '2022-01-22', '2022-02-09',
               '2022-02-17', '2022-04-06', '2022-04-10', '2022-04-11',
               '2022-06-25', '2022-07-11', '2022-09-25', '2022-09-26',
               '2022-10-01', '2022-10-31', '2023-01-25', '2023-08-12',
               '2023-09-14', '2023-09-21', '2023-09-24', '2023-10-06',
               '2023-10-13', '2023-11-20', '2023-12-23', '2024-01-28',
               '2024-01-29', '2024-02-01', '2024-07-14', '2024-08-25'],
              dtype='datetime64[ns]', name='Day_min', freq=None)

In [4]:
## create list of init dates we need 
## we will run mclimate based on these dates and lead times
## for each impact date
## for initialization date 1-7 days before impact date
## F000, F024, F072, ...
date_lst = []
F_lst = []
impact_date_lst = []
model_lst = []
for i, date in enumerate(unique_dates):
    ## skip 2 events - 20200227, 20200817
    ## the data from GEFS was too hard to download for these dates
    if (date.strftime("%Y%m%d") == '20200227') | (date.strftime("%Y%m%d") == '20200817'):
        pass
    else:
        for j, init_lead in enumerate(np.arange(1, 8)):
            init_date = date - pd.to_timedelta(init_lead, unit='D')
            F_lst.append(init_lead*24)
            date_lst.append(init_date)
            impact_date_lst.append(date)
    
            if init_date.year < 2020:
                model_name = 'GEFSv12_reforecast'
            else:
                model_name = 'GEFS_archive'
    
            model_lst.append(model_name)

d = {'impact_date': impact_date_lst, 'init_date': date_lst, 'F': F_lst, 'model_name': model_lst}
df = pd.DataFrame(d)
df

Unnamed: 0,impact_date,init_date,F,model_name
0,2020-12-02,2020-12-01,24,GEFS_archive
1,2020-12-02,2020-11-30,48,GEFS_archive
2,2020-12-02,2020-11-29,72,GEFS_archive
3,2020-12-02,2020-11-28,96,GEFS_archive
4,2020-12-02,2020-11-27,120,GEFS_archive
...,...,...,...,...
289,2024-02-01,2024-01-29,72,GEFS_archive
290,2024-02-01,2024-01-28,96,GEFS_archive
291,2024-02-01,2024-01-27,120,GEFS_archive
292,2024-02-01,2024-01-26,144,GEFS_archive


In [5]:
def find_max_value_for_lead_time(fdate, F, model, impact_date):
    ## run mclimate comparison

    ## save maximum percentile rank value for 
    ## IVT, freezing level, 1000 hPa wind mag
    
    ## table / CSV rows
    ## impact date, F, IVT, freezing level, 1000 hPa wind
    # each impact date will have 7 rows
    
    ####################################
    ### COMPARE FORECAST TO MCLIMATE ###
    ####################################
    var_lst = ['ivt', 'freezing_level', 'uv1000']
    ds_lst = []
    ds_lst2 = []
    fc_lst = []
    for i, varname in enumerate(var_lst):
        forecast, ds = mclim_func.run_compare_mclimate_forecast(varname, fdate, model, server='expanse', F=F)
        fc_lst.append(forecast)
        ds_lst.append(ds)
    
        if varname == 'uv1000':
                ds = ds.rename({'mclimate': 'uv'})
        else:
            ds = ds.rename({'mclimate': varname})
        ds_lst2.append(ds)
        
    ### merge the datasets
    ds3 = xr.merge(ds_lst2)
    ds3 = ds3.sortby('lat')
    
    fc = xr.merge(fc_lst)
    fc = fc.sortby('lat')
    
    ## get maximum value within Southeast AK domain
    ext=[-141., -130., 54., 60.]
    tmp = ds3.sel(lat=slice(ext[2], ext[3]), lon=slice(ext[0], ext[1]))
    maxval = tmp.max(dim=['lat', 'lon']).fillna(0)

    ## put all that back into a dataframe
    d = {'impact_date': impact_date, 'init_date': fdate, 'F': F, 'model_name': model,
         'IVT': maxval.ivt.values, 'Z0': maxval.freezing_level.values, 'UV': maxval.uv.values}
    df = pd.DataFrame(d)
    
    return df

In [6]:
df_lst = []
for index, row in df.iterrows():
    fdate = row.init_date.strftime('%Y%m%d')
    F = row.F
    model = row.model_name
    impact_date = row.impact_date

    max_df = find_max_value_for_lead_time(fdate, F, model, impact_date)
    df_lst.append(max_df)

## merge df_lst
df_vertical = pd.concat(df_lst)
df_vertical

12 01
12 01
12 01
11 30
11 30
11 30
11 29
11 29
11 29
11 28
11 28
11 28
11 27
11 27
11 27
11 26
11 26
11 26
11 25
11 25
11 25
11 30
11 30
11 30
11 29
11 29
11 29
11 28
11 28
11 28
11 27
11 27
11 27
11 26
11 26
11 26
11 25
11 25
11 25
11 24
11 24
11 24
09 24
09 24
09 24
09 23
09 23
09 23
09 22
09 22
09 22
09 21
09 21
09 21
09 20
09 20
09 20
09 19
09 19
09 19
09 18
09 18
09 18
01 17
01 17
01 17
01 16
01 16
01 16
01 15
01 15
01 15
01 14
01 14
01 14
01 13
01 13
01 13
01 12
01 12
01 12
01 11
01 11
01 11
01 27
01 27
01 27
01 26
01 26
01 26
01 25
01 25
01 25
01 24
01 24
01 24
01 23
01 23
01 23
01 22
01 22
01 22
01 21
01 21
01 21
01 28
01 28
01 28
01 27
01 27
01 27
01 26
01 26
01 26
01 25
01 25
01 25
01 24
01 24
01 24
01 23
01 23
01 23
01 22
01 22
01 22
12 22
12 22
12 22
12 21
12 21
12 21
12 20
12 20
12 20
12 19
12 19
12 19
12 18
12 18
12 18
12 17
12 17
12 17
12 16
12 16
12 16
10 12
10 12
10 12
10 11
10 11
10 11
10 10
10 10
10 10
10 09
10 09
10 09
10 08
10 08
10 08
10 07
10 07
10 07
10 06
10 0

Unnamed: 0,impact_date,init_date,F,model_name,IVT,Z0,UV
0,2020-12-02,20201201,24,GEFS_archive,0.99,0.96,0.99
0,2020-12-02,20201130,48,GEFS_archive,0.99,0.96,0.99
0,2020-12-02,20201129,72,GEFS_archive,0.99,0.96,0.99
0,2020-12-02,20201128,96,GEFS_archive,0.99,0.96,0.99
0,2020-12-02,20201127,120,GEFS_archive,0.99,0.95,0.99
...,...,...,...,...,...,...,...
0,2024-02-01,20240129,72,GEFS_archive,0.96,0.75,0.97
0,2024-02-01,20240128,96,GEFS_archive,0.95,0.75,0.95
0,2024-02-01,20240127,120,GEFS_archive,0.90,0.00,0.90
0,2024-02-01,20240126,144,GEFS_archive,0.75,0.00,0.75


In [7]:
# Save as CSV
if model_name == 'GEFS_reforecast':
    df_vertical.to_csv(path_to_out+'box_whisker_2000-2019.csv', index=False)
else:
    df_vertical.to_csv(path_to_out+'box_whisker_2020-2024.csv', index=False)