# Canadian Extreme Precipitation: precipitation statistics

Calculates maximum, 99th and 95th precentile statistics

In [15]:
import sys
sys.path.append('../canadian_extreme_precip/')

import numpy as np
import pandas as pd

from reader import read_combined_file
from filepath import combined_station_filepath

In [18]:
stations_list = [
    'cape dyer',
    'resolute bay',
    'eureka',
    'alert',
    'clyde river',
    'cambridge bay',
    'hall beach',
    'sachs harbour',
    'inuvik',
    'pond inlet',
    ]

PRECIP_THRESHOLD = 0.

In [19]:
def load_precipitation(station):
    df = read_combined_file(combined_station_filepath(station))
    df_pr = df['TOTAL_PRECIPITATION']
    return df_pr

In [20]:
for station in stations_list:
    df = load_precipitation(station)
    df_pr = df[df > PRECIP_THRESHOLD]
    print(f"{station:15s}, {df_pr.idxmax()}, {df_pr.max()}")

cape dyer      , 1980-01-22 00:00:00, 90.2
resolute bay   , 2004-11-17 00:00:00, 35.0
eureka         , 1953-08-17 00:00:00, 41.7
alert          , 2008-04-06 00:00:00, 44.3
clyde river    , 1977-04-07 00:00:00, 41.0
cambridge bay  , 1988-07-24 00:00:00, 35.8
hall beach     , 1980-08-27 00:00:00, 52.6
sachs harbour  , 1990-04-26 00:00:00, 26.0
inuvik         , 1995-08-29 00:00:00, 52.2
pond inlet     , 2002-07-26 00:00:00, 30.5


In [30]:
result = {}
quantiles = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0]
for station in stations_list:
    df = load_precipitation(station)
    df_pr = df[df > PRECIP_THRESHOLD]
    q = df_pr.quantile(quantiles, interpolation='lower')
    result[station] = q
rdf = pd.DataFrame(result).T
rdf.columns = [f"p{int(q*100.):02d}" for q in quantiles]
rdf

Unnamed: 0,p01,p05,p10,p25,p50,p75,p90,p95,p99,p100
cape dyer,0.2,0.2,0.3,0.5,1.6,5.1,12.2,18.8,38.1,90.2
resolute bay,0.2,0.2,0.2,0.3,0.6,1.5,3.3,5.3,11.9,35.0
eureka,0.2,0.2,0.2,0.3,0.5,1.2,2.6,4.0,8.0,41.7
alert,0.2,0.2,0.2,0.4,0.7,1.5,3.3,4.8,9.7,44.3
clyde river,0.2,0.2,0.2,0.4,0.9,2.3,5.0,7.6,16.5,41.0
cambridge bay,0.2,0.2,0.2,0.3,0.6,1.5,3.6,5.6,11.9,35.8
hall beach,0.2,0.2,0.2,0.4,0.8,2.2,4.8,7.2,15.4,52.6
sachs harbour,0.2,0.2,0.2,0.3,0.6,1.5,3.3,5.1,11.4,26.0
inuvik,0.2,0.2,0.3,0.5,1.0,2.2,4.6,7.1,13.7,52.2
pond inlet,0.2,0.2,0.2,0.5,1.0,2.3,5.0,7.6,15.5,30.5


In [31]:
rdf.to_csv('../data/station_precip_statistics.csv')

In [32]:
result = {}
quantiles = [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 1.0]
for station in stations_list:
    df = load_precipitation(station)
    df_pr = df[df >= 1.]
    q = df_pr.quantile(quantiles, interpolation='lower')
    result[station] = q
rdf1 = pd.DataFrame(result).T
rdf1.columns = [f"p{int(q*100.):02d}" for q in quantiles]
rdf1

Unnamed: 0,p01,p05,p10,p25,p50,p75,p90,p95,p99,p100
cape dyer,1.0,1.0,1.2,1.8,3.6,8.1,16.4,23.4,43.7,90.2
resolute bay,1.0,1.0,1.0,1.3,2.0,3.4,6.2,8.6,15.6,35.0
eureka,1.0,1.0,1.0,1.2,1.8,3.0,4.8,6.6,11.8,41.7
alert,1.0,1.0,1.0,1.3,1.8,3.1,5.3,7.1,12.4,44.3
clyde river,1.0,1.0,1.0,1.4,2.3,4.2,7.6,10.8,20.4,41.0
cambridge bay,1.0,1.0,1.0,1.3,2.0,3.6,6.4,8.8,17.5,35.8
hall beach,1.0,1.0,1.0,1.4,2.2,4.1,7.3,10.5,19.4,52.6
sachs harbour,1.0,1.0,1.0,1.3,2.0,3.3,5.8,8.4,15.7,26.0
inuvik,1.0,1.0,1.0,1.3,2.0,3.8,6.9,9.4,17.8,52.2
pond inlet,1.0,1.0,1.0,1.2,2.0,3.8,6.6,10.5,19.6,30.5


Find percentiles for 1 mm daily precipitation

In [None]:
for station in stations_list:
    df = load_precipitation(station)
    df_pr = df[df > 0.]
    index_1mm = df_pr.sort_values().

In [39]:
df = pd.DataFrame([0.2, 0.2, 0.4, 0.6, 0.7, 1.0, 1.0, 1.0, 4.5, 7.8, 90.], columns=['precip']).sort_values(by='precip')
df['precip_rank'] = df['precip'].rank(method='max')
df['precip_rank_pct'] = df['precip'].rank(method='max', pct=True)
df

Unnamed: 0,precip,precip_rank,precip_rank_pct
0,0.2,2.0,0.181818
1,0.2,2.0,0.181818
2,0.4,3.0,0.272727
3,0.6,4.0,0.363636
4,0.7,5.0,0.454545
5,1.0,8.0,0.727273
6,1.0,8.0,0.727273
7,1.0,8.0,0.727273
8,4.5,9.0,0.818182
9,7.8,10.0,0.909091


In [47]:
df[df['precip'] == 1]['precip_rank_pct'].iloc[-1]

0.7272727272727273