# A reanalysis of corrections of NP drifting station precipitation
_This notebook attemps to reproduce precipitation corrections applied to gauge measurements of precipitation from Russian North Pole drifting stations.  The correction methods were originally described in Yang (1999) and Yang et al (1995)_

In [1]:
%matplotlib inline

In [2]:
import sys
sys.path.append('../source')

import glob
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import calendar

import readers.npsnow as npsnow
import trajectory
from merge_npsnow_data import get_station_list, merge_one_station
from constants import DATADIR

## Merge data for all stations beyond NP-4 and excluding NP-14

1. Load data for one station
2. Drop rows with missing wind speed and air temperature values
3. Calculate monthly data
4. Calculate annual data

In [3]:
def monthly_dataframe(df):
    dfMon = pd.DataFrame({
        'ND': df.PRECIP.resample('MS').count(),
        'Tmn': df.TAIR.resample('MS', label='left').mean(),
        'Ug': df.Ug.resample('MS', label='left').mean(),
        'DP': df.PRECIP[df.PRECIP > 0.].resample('MS').count(),
        'Dtc': df.PRECIP[df.PRECIP == 0.].resample('MS').count(),
        'Pg': df.PRECIP[df.PRECIP > 0].resample('MS').sum(),
        'Ptc': df.Ptrace.resample('MS').sum(),
        'Pwind': df.Pwind.resample('MS').sum(),
        'Pcorr': df.Pcorr.resample('MS').sum(),
        'Psnow': df.Psnow.resample('MS').sum(),
        })
    return dfMon

def annual_dataframe(df):
    dfAnn = pd.DataFrame({
        'ND': df.ND.resample('AS').sum(min_count=12),
        'Tmn': df.Tmn.resample('AS').mean(),
        'Ug': df.Ug.resample('AS').mean(),
        'DP': df.DP.resample('AS').sum(min_count=12),
        'Dtc': df.Dtc.resample('AS').sum(min_count=12),
        'Pg': df.Pg.resample('AS').sum(min_count=12),
        'Ptc': df.Ptc.resample('AS').sum(min_count=12),
        'Pwind': df.Ptc.resample('AS').sum(min_count=12),
        'Pcorr': df.Pcorr.resample('AS').sum(min_count=12),
        'Psnow': df.Psnow.resample('AS').sum(min_count=12),
        })
    return dfAnn

def process_station(sid):
    df = merge_one_station(sid, set_noprecip=False)
    df = df.dropna(axis=0, subset=['WSPD', 'TAIR', 'PRECIP', 'PTYPE'])
    df['Ptrace'] = np.where((df['PRECIP'] == 0.) & (df['PTYPE'] > 0.), 0.1, 0.)  # Set trace precipitation were PRECIP == 0
    df['PRECIP'] = df['PRECIP'].where(df['PRECIP'] > 0., 0.)
    df['PTYPE'] = df['PTYPE'].where(df['PTYPE'] > 0., 0.)

    dfMon = monthly_dataframe(df)
    dfMon['StationID'] = int(sid)
    dfAnn = annual_dataframe(dfMon)
    dfAnn['StationID'] = int(sid)
    dfAnn = dfAnn.dropna(axis=0)
    
    return dfAnn.reset_index()

# Add dropna to proc
# Add station number
# Reset index 

def plot_trajectory(lon, lat, lon2, lat2):
    
    map_proj = ccrs.NorthPolarStereo()

    fig = plt.figure(figsize=(10,10))
    ax = plt.subplot(projection=map_proj)
    ax.set_extent([-180., 180., 72., 90.], ccrs.PlateCarree())
    ax.add_feature(cfeature.LAND)
    ax.add_feature(cfeature.COASTLINE)

    pts = map_proj.transform_points(ccrs.PlateCarree(), lon, lat)
    xm = pts[:,0]
    ym = pts[:,1]

    pts = map_proj.transform_points(ccrs.PlateCarree(), lon2, lat2)
    xu = pts[:,0]
    yu = pts[:,1]

    ax.plot(xm, ym, label='Raw')
    ax.plot(xu, yu, label='Daily')

    ax.legend()

## Data
I use raw data from the NP drifting stations.  I have produced my own combined files that combine precipitation and meteorological observations.  These files are stored in /home/apbarret/Data/NPSNOW/my_combined_met

- TAIR - 2m air temperature
- RH - Relative humidity at 2m
- SLP - Sea level pressure
- WDIR - wind direction
- WSPD - wind speed m/s
- TOTCLD - total cloud cover
- LOWCLD - low cloud cover
- TSURF - surface temperature
- PRECIP - precipitation amount in mm  0 = Trace, when PTYPE != 0
- PTYPE - precipitation type
  - 1 - solid precipitation
  - 2 - mixed phase (rain/snow)
  - 3 - rain
- SDEPTH - snow depth

## Reproduce table 1 from Yang (1999)

In [63]:
filelist = sorted(glob.glob(os.path.join(DATADIR, 'my_combined_met', 'npmet*.csv')))
df = pd.read_csv(filelist[0], index_col=0, header=0, parse_dates=[0])
df = df.dropna(axis=0, subset=['WSPD', 'TMIN', 'TMAX', 'PRECIP', 'PTYPE'])  #
df['Ptrace'] = np.where((df['PRECIP'] == 0.) & (df['PTYPE'] > 0.), 0.1, 0.)  # Set trace precipitation were PRECIP == 0
df['PTYPE'].where(df.PTYPE > 0, 0, inplace=True)  # 
df[['Station_ID', 'WSPD', 'TMIN', 'TMAX', 'Ug', 'PRECIP', 'PTYPE', 'Ptrace']]

Unnamed: 0,Station_ID,WSPD,TMIN,TMAX,Ug,PRECIP,PTYPE,Ptrace
1954-05-01 12:00:00,3.0,2.125,-18.8,-15.1,1.557382,-9.9,0.0,0.0
1954-05-02 12:00:00,3.0,2.500,-18.0,-17.0,1.832214,-9.9,0.0,0.0
1954-05-03 12:00:00,3.0,2.500,-20.3,-16.8,1.832214,-9.9,0.0,0.0
1954-05-04 12:00:00,3.0,2.000,-19.9,-16.8,1.465771,-9.9,0.0,0.0
1954-05-05 12:00:00,3.0,4.125,-19.4,-18.0,3.023153,-9.9,0.0,0.0
1954-05-06 12:00:00,3.0,4.500,-17.2,-14.8,3.297986,-9.9,0.0,0.0
1954-05-07 12:00:00,3.0,3.750,-17.0,-14.8,2.748321,-9.9,0.0,0.0
1954-05-08 12:00:00,3.0,2.500,-16.3,-14.8,1.832214,-9.9,0.0,0.0
1954-05-09 12:00:00,3.0,3.875,-18.0,-16.6,2.839932,-9.9,0.0,0.0
1954-05-10 12:00:00,3.0,2.250,-15.4,-13.6,1.648993,-9.9,0.0,0.0


In [45]:
pd.DataFrame({
    'NP': df.Station_ID.resample('MS').first(),
    'ND': df.PRECIP.resample('MS').count(),
    'Tmn': df.TAIR.resample('MS', label='left').mean(),
    'Ug': df.Ug.resample('MS', label='left').mean(),
    'DP': df.PRECIP[df.PRECIP > 0.].resample('MS').count(),
    'Dtc': df.PRECIP[df.PRECIP == 0.].resample('MS').count(),
    'Dsnow': df.PTYPE[df.PTYPE == 1].resample('MS').count(),
    'Dmix': df.PTYPE[df.PTYPE == 2].resample('MS').count(),
    'Drain': df.PTYPE[df.PTYPE == 3].resample('MS').count(),
    'Pg': df.PRECIP[df.PRECIP > 0].resample('MS').sum(),
    ''    
})

Unnamed: 0,NP,ND,Tmn,Ug,DP,Dtc,Dsnow,Dmix,Drain,Pg
1954-04-01,3.0,0,-16.83375,,,,,,,
1954-05-01,3.0,31,-11.327419,2.600927,4.0,15.0,17.0,2.0,,0.9
1954-06-01,3.0,30,-1.953333,2.239954,1.0,12.0,5.0,7.0,1.0,0.1
1954-07-01,3.0,31,-0.345161,3.284056,17.0,12.0,9.0,18.0,2.0,21.2
1954-08-01,3.0,31,-1.490323,2.827793,12.0,8.0,3.0,12.0,4.0,22.7
1954-09-01,3.0,30,-9.543333,3.210101,22.0,5.0,18.0,9.0,,9.5
1954-10-01,3.0,31,-14.126613,3.645928,20.0,8.0,22.0,6.0,,7.2
1954-11-01,3.0,30,-27.563631,2.316946,14.0,11.0,25.0,,,4.5
1954-12-01,3.0,31,-30.299597,2.262582,16.0,7.0,23.0,,,8.0
1955-01-01,3.0,31,-33.710887,2.98725,8.0,16.0,24.0,,,1.2


## Identify days with trace precipitation
Yang sets daily trace precipitation to 0.1 mm

In [None]:
df['Ptrace'] = np.where((df['PRECIP'] == 0.) & (df['PTYPE'] > 0.), 0.1, 0.)  # Set trace precipitation were PRECIP == 0
df['PRECIP'] = df['PRECIP'].where((df['PRECIP'].isna()) | (df['PRECIP'] > 0.), 0.)  # Set -9.9 to zero (no precip) but leave NaN
df['PTYPE'] = df['PTYPE'].where((df['PTYPE'].isna()) | (df['PTYPE'] > 0.), 0.)  # ditto
df['Psnow'] = df['PRECIP'].where(df['PTYPE'] == 1, 0.)

## Calculate wind correction

### Catch ratios from Yang et al (1995)
Snow
$$R = 103.10 - 8.67 W_s + 0.30 T_{max}$$
Snow and Rain
$$R = 98.56 - 6.19 W_s + 0.90 T_{max}$$
Rain and Snow
$$R = 98.13 - 3.17 W_s + 0.60 T_{min}$$
Rain
$$R = 99.99 - 4.77 W_s^{0.56}$$

In [None]:
def cr_snow(x):
    """Catch Ratio for snow"""
    return 103.11 - 8.67*x.Ug + 0.3*x.TMAX

def cr_mixed(x):
    """Catch ratio for mixed precipitation"""
    return 96.99 -4.46*x.Ug + 0.88*x.TMAX + 0.22*x.TMIN

def cr_rain(x):
    """Catch ratio for rain"""
    return 99.99 - 4.77*(x.Ug**0.56)

def cr_none(x):
    return np.nan

catch_ratio = {
    1: cr_snow,
    2: cr_mixed,
    3: cr_rain,
    0: 0.
}

def wind_correction(x):
    if x[['PRECIP', 'PTYPE', 'Ug', 'TMAX', 'TMIN']].isna().any():
        return np.nan
    if x.PRECIP == 0.:
        return 0.
    #if x.Ug > 6.:
    #    return 0.  # Yang does not apply correction for wind speeds above 6 m/s
    cr_function = catch_ratio.get(x['PTYPE'], None)
    try:
        cr = cr_function(x)
    except TypeError:
        print (f'Unexpected PTYPE {x.PTYPE}')
    k = 100./cr
    return x.PRECIP * (k - 1.)
    

## Test wind correction function

df_test = pd.DataFrame({'PRECIP': [np.nan, 0.0, 0.2, 3.0, 1.6, 2.0],
                        'PTYPE': [np.nan, 0, 1, 2, 3, -9],
                        'Ug': [4.5, 2.0, np.nan, 3.0, 4.0, 7.0],
                        'TMAX': [-15., -10., -1., 1., -20., -23.],
                        'TMIN': [-20., -18., -9., -5., -29., -30.]})
df_test.apply(wind_correction, axis=1)

Ug, TMAX, TMIN = 7.0, -23., -30.
cr = 103.11 - 8.67*Ug + 0.3*TMAX
k = 100./cr
print (cr, k, k-1)

## Apply to merged DataFrame

In [None]:
df['Pwind'] = df.apply(wind_correction, axis=1)
df['Pcorr'] = df['PRECIP'] + df['Ptrace'] + df['Pwind']

In [None]:
df.head()

## Calculate monthly data

In [None]:
dfMon = monthly_dataframe(df)
dfMon['DaysInMonth'] = [calendar.monthrange(time.year, time.month)[1] for time in dfMon.index]
dfMon = dfMon[dfMon.ND == dfMon.DaysInMonth].drop('DaysInMonth', axis=1)
dfMon['Fsnow'] = dfMon['Psnow'] / dfMon['Pg']
dfMon

In [None]:
dfAnn = annual_dataframe(dfMon)
dfAnn['Fsnow'] = dfAnn['Psnow'] / dfAnn['Pg']
dfAnn.dropna()

## Compare with Yang

In [None]:
yang_diri = '/home/apbarret/Data/NPSNOW/yang_precip'
yangMon = npsnow.read_yang_updated(os.path.join(yang_diri, f'yang_np_precip_updated_coords_{sid}.csv'))
yangMon.index = yangMon.Date
#yangMon.index = yangMon.index.shift(12, freq='H')
yangMon = yangMon.drop('Date', axis=1)
yangMon

In [None]:
x = yangMon.join(dfMon, rsuffix='_new')
x

In [None]:
x.columns

In [None]:
fig, ax = plt.subplots(2, 2, figsize=(15,15))

for iax, xname, yname in zip(ax.flatten(), ['Pg', 'traceC', 'windC', 'Pc'], ['Pg_new', 'Ptc', 'Pwind', 'Pcorr']):
    x.plot(kind='scatter', x=xname, y=yname, ax=iax)
    xmax = x[[xname, yname]].max().max()
    iax.set_xlim(0, xmax)
    iax.set_ylim(0.,xmax)
    iax.set_aspect('equal')
    iax.plot([0.,xmax], [0.,xmax], c='0.5')