In [1]:
%matplotlib inline

import subprocess
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import warnings

warnings.simplefilter('ignore')

# WATExR: Forecast workflow

## 1. Get seasonal forecast 

Downloads and bias-corrects (without cross-validation) the 15-member seasonal forecast for the specificed year and season.

**Note:** Takes several minutes to complete.

In [2]:
year = 2000

# Season ('winter', 'spring', early_summer' or 'late_summer')
season = 'winter'

In [3]:
# Map season to months
months_dict = {'winter':      '11,12,1',
               'spring':      '2,3,4',
               'early_summer':'5,6,7',
               'late_summer': '8,9,10',
              }

assert season in months_dict.keys(), "'season' must be one of ('winter', 'spring', early_summer', 'late_summer')."

In [None]:
# Process seasoanl data using R
# This takes a while!
res = subprocess.check_call(['Rscript',
                             '--vanilla',
                             'get_seasonal_forecast.R',
                             str(year),
                             months_dict[season]],
                           )

## 2. Compare forecast to historic record

In [4]:
def get_season(row):
    if row['date'].month in (11, 12, 1):
        return 'winter'
    elif row['date'].month in (2, 3, 4):
        return 'spring'
    elif row['date'].month in (5, 6, 7):
        return 'early_summer'
    elif row['date'].month in (8, 9, 10):
        return 'late_summer'
    else:
        return np.nan

### 2.1. Read EWEMBI dataset (1981 - 2010)

**Note:** The code in Section 2.1 only needs to be re-run if the EWEMBI dataset changes. Quantiles are saved in

    WATExR/Norway_Morsa/Data/Meteorological/01_ewembi_obs/ewembi_obs_quantiles_1981-2010.csv

In [5]:
# Read EWEMBI for 1981 - 2010
ewembi_path = (r'/home/jovyan/projects/watexr/WATExR/Norway_Morsa/Data'
               r'/Meteorological/01_ewembi_obs/ewembi_obs_1981-2010.dat')

# Loop over seasons
df_list = []
for seas in months_dict.keys():
    obs_df = pd.read_csv(ewembi_path, sep='\t', encoding='utf-8',
                         names=['date', 'time', 'uas', 'vas', 'ps', 'tas', 'pr', 'hurs', 'petH'])

    # Parse dates
    obs_df['date'] = pd.to_datetime(obs_df['date'], format='%Y-%m-%d')
    obs_df['month'] = obs_df['date'].dt.month
    obs_df['year'] = obs_df['date'].dt.year
    obs_df['season'] = obs_df.apply(get_season, axis=1)

    # January should always be associated with the previous year
    mask = obs_df['month'] == 1
    obs_df['year'][mask] = obs_df['year'][mask] - 1

    # Get vars of interest (temp, precip and wind)
    # NOTE: wind seems to be reported as vectors with E-W and N-S components
    # estimate total of (u**2 + v**2)**0.5, but CHECK THIS!
    obs_df['wind'] = (obs_df['uas']**2 + obs_df['vas']**2)**0.5

    # Filter to cols and season of interest
    obs_df = obs_df[['year', 'season', 'tas', 'pr', 'wind']]
    obs_df = obs_df.query('season == @seas')
    obs_df = obs_df.query('year != 1980') # 1980 not valid (as only have Jan from 1981)

    # Groupby year
    obs_df = obs_df.groupby('year').mean()

    # Calculate quantiles
    quant_df = obs_df.quantile([0.05, 0.33, 0.67, 0.95])
    quant_df['quantile'] = quant_df.index
    quant_df['season'] = seas
    
    df_list.append(quant_df)

# Combine
quant_df = pd.concat(df_list)
quant_df.reset_index(inplace=True)
quant_df = quant_df[['season', 'quantile', 'tas', 'pr', 'wind']]

# Save
quant_path = (r'/home/jovyan/projects/watexr/WATExR/Norway_Morsa/Data'
            r'/Meteorological/01_ewembi_obs/ewembi_obs_quantiles_1981-2010.csv')
quant_df.to_csv(quant_path, encoding='utf-8', index=False)

quant_df

Unnamed: 0,season,quantile,tas,pr,wind
0,winter,0.05,-4.833879,1.039551,2.806734
1,winter,0.33,-1.487421,2.353131,3.164223
2,winter,0.67,0.114942,2.890104,3.619029
3,winter,0.95,1.372469,4.345663,4.017649
4,spring,0.05,-1.358771,0.950261,2.625764
5,spring,0.33,0.336734,1.736817,2.944407
6,spring,0.67,2.09986,2.37815,3.228665
7,spring,0.95,3.44689,3.229093,3.931998
8,early_summer,0.05,12.50353,1.604949,2.264006
9,early_summer,0.33,13.480366,1.965119,2.649072


### 2.2. Read forecasts for coming season

In [6]:
# List of output from 15-member ensemble
base_path = (r'/home/jovyan/projects/watexr/WATExR/Norway_Morsa/Data'
             r'/Meteorological/05_temporary_forecast_data/Morsa/CLIMATE')

s4_dirs = list(os.listdir(base_path))

# Loop over ensemble
df_list = []
for dir_path in s4_dirs:
    # Get member
    member = dir_path.split('_')[6][-2:]
    
    fpath = os.path.join(base_path, dir_path, 'meteo_file.dat')
    
    mod_df = pd.read_csv(fpath, sep='\t', encoding='utf-8',
                         names=['date', 'time', 'uas', 'vas', 'ps', 'tas', 'pr', 'hurs', 'petH'])

    # Parse dates
    mod_df['date'] = pd.to_datetime(mod_df['date'], format='%Y-%m-%d')
    mod_df['month'] = mod_df['date'].dt.month
    mod_df['year'] = mod_df['date'].dt.year
    mod_df['season'] = mod_df.apply(get_season, axis=1)
    
    # January should always be associated with the previous year
    mask = mod_df['month'] == 1
    mod_df['year'][mask] = mod_df['year'][mask] - 1

    # Get vars of interest (temp, precip and wind)
    # NOTE: wind seems to be reported as vectors with E-W and N-S components
    # estimate total of (u**2 + v**2)**0.5, but CHECK THIS!
    mod_df['wind'] = (mod_df['uas']**2 + mod_df['vas']**2)**0.5

    # Filter to cols and season of interest
    mod_df = mod_df[['year', 'season', 'tas', 'pr', 'wind']]
    mod_df = mod_df.query('season == @season')
    mod_df = mod_df.query('year != 1980') # 1980 not valid (as only have Jan from 1981)

    # Groupby year
    mod_df = mod_df.groupby('year').mean()

    assert len(mod_df) == 1
    
    # Tidy 
    mod_df.reset_index(inplace=True, drop=True)
    mod_df['member'] = member
    
    df_list.append(mod_df)

# Combine
mod_df = pd.concat(df_list)
mod_df = mod_df[['member', 'tas', 'pr', 'wind']]
mod_df.sort_values('member', inplace=True)
mod_df.set_index('member', inplace=True)

mod_df

Unnamed: 0_level_0,tas,pr,wind
member,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.239281,1.985818,3.596937
2,1.5819,2.336534,4.290225
3,-1.966635,2.654936,3.354824
4,-0.512703,4.082073,3.69784
5,1.350164,1.965141,3.828533
6,-1.52244,2.201166,4.175351
7,0.307385,1.960699,3.94125
8,-2.088796,1.100484,3.734602
9,2.620509,3.945526,4.49569
10,-1.240333,1.442105,3.663107


In [16]:
# Read quantiles
quant_path = (r'/home/jovyan/projects/watexr/WATExR/Norway_Morsa/Data'
            r'/Meteorological/01_ewembi_obs/ewembi_obs_quantiles_1981-2010.csv')
quant_df = pd.read_csv(quant_path)

# Get season
quant_df = quant_df.query('season == @season')
quant_df.index = quant_df['quantile']

# Labels for terciles and extremes
terc_labels = ['Below normal', 'Near normal', 'Above normal']
ext_labels = ['Extreme low', 'Not extreme', 'Extreme high']

# Loop over variables
res_dict = {}
for par in ['tas', 'pr', 'wind']:
    # Build bins for terciles
    terc_bins = quant_df.loc[[0.33, 0.67]][par].values
    terc_bins = np.insert(terc_bins, 0, -np.inf)
    terc_bins = np.append(terc_bins, np.inf)  
    
    # Build bins for extremes
    ext_bins = quant_df.loc[[0.05, 0.95]][par].values
    ext_bins = np.insert(ext_bins, 0, -np.inf)
    ext_bins = np.append(ext_bins, np.inf)  
    
    # Get terciles
    terc_df = pd.cut(mod_df[par], 
                     bins=terc_bins,
                     labels=terc_labels)
    
    terc = terc_df.value_counts().idxmax()
    terc_prob = 100 * terc_df.value_counts().max() / 15
    
    # Get extremes
    ext_df = pd.cut(mod_df[par], 
                    bins=ext_bins,
                    labels=ext_labels)
    
    ext = ext_df.value_counts().idxmax()
    ext_prob = 100 * ext_df.value_counts().max() / 15
    
    # Add to results
    res_dict[par] = {'tercile':terc,
                     'tercile_prob':terc_prob,
                     'extreme':ext,
                     'extreme_prob':ext_prob}

In [17]:
res_dict

{'tas': {'tercile': 'Above normal',
  'tercile_prob': 66.66666666666667,
  'extreme': 'Not extreme',
  'extreme_prob': 66.66666666666667},
 'pr': {'tercile': 'Below normal',
  'tercile_prob': 53.333333333333336,
  'extreme': 'Not extreme',
  'extreme_prob': 100.0},
 'wind': {'tercile': 'Above normal',
  'tercile_prob': 86.66666666666667,
  'extreme': 'Extreme high',
  'extreme_prob': 53.333333333333336}}

In [None]:
a