# Extreme precipitation in the Canadian Arctic
Here, I process, clean and explore data used in the extreme precipitation in Canadian Arctic study.

Data are obtained from https://climatedata.ca/download/#station-download.  This is not the official ECCC site but collates and serves daily data.

_I need to check what they do_

Many stations have had site and instrumentation changes.  Where these have occurred and sites are co-located, records have been combined.

#### To Do
- Make plot of record span for each station, including periods of missing data  
- Make map of stations with symbols showing length of record
- Make table showing length of record and completeness of record by year  
- _Tair and Prceip statistics for combined records_  
- Plot heatmap showing timing and amount of precipitation for each station
- Calculate seasonal cycle of precipitations (precip days, average P, phase)  


In [106]:
from pathlib import Path
import re
import pprint

import pandas as pd

DATAPATH = Path('/', 'home', 'apbarret', 'Data', 'Rain_on_snow', 'Canada_extreme_precip', 'From_Mark')

In [74]:
p1 = re.compile('.*(?=_Combined)')
p2 = re.compile('.*(?=\.+csv)')
data = {}
for fp in DATAPATH.glob('*.csv'):
    m1 = p1.search(fp.name)
    if m1:
        station_name = m1.group(0)
    else:
        m2 = p2.search(fp.name)
        if m2:
            station_name = m2.group(0)
        else:
            print(f'{fp.name} No match!')
            continue
    station_name = ' '.join(station_name.split('_'))
    if station_name.endswith('.'):
        station_name = station_name[:-1]
    data[station_name] = fp

In [75]:
list(data.keys())

['Alert',
 'Eureka',
 'Cambridge Bay',
 'Hall Beach',
 'Inuvik A',
 'Sachs Harbour',
 'Cape-Dyer A',
 'Resolute Bay',
 'Clyde River',
 'Pond Inlet A']

In [81]:
def load_data(filepath):
    '''Loads a combined file'''
    df = pd.read_csv(filepath, skiprows=2)
    df = df.dropna(how='all')
    return df


def get_station_metadata(df):
    '''Extracts station metadata from first and last records of each unique CLIMATE_IDENTIFIER'''
    grouper = df.groupby(df.CLIMATE_IDENTIFIER)

    first = grouper.first().loc[:,['x', 'y', 'STATION_NAME', 'LOCAL_DATE', 'PROVINCE_CODE']]
    last = grouper.last().loc[:, 'LOCAL_DATE']
    
    metadata = first.join(last, rsuffix='_last')
    metadata = metadata.rename({'LOCAL_DATE': 'START_DATE', 'LOCAL_DATE_last': 'END_DATE'}, axis=1)
    metadata.index = metadata.index.astype(int)
    metadata['START_DATE'] = pd.to_datetime(metadata['START_DATE'])
    metadata['END_DATE'] = pd.to_datetime(metadata['END_DATE'])
    return metadata
    

In [108]:
combine_recipes = {}
for station_name, fp in data.items():
    
    df = load_data(fp)
    try:
        metadata = get_station_metadata(df)
    except Exception as error:
        print(error)
        continue
    
    station_recipe = {}
    for row in metadata.iterrows():
        station_recipe[row[0]] = {
            'start_date': row[1].START_DATE.strftime('%Y-%m-%d'),
            'end_date': row[1].END_DATE.strftime('%Y-%m-%d'),
        }
    combine_recipes[station_name.lower()] = station_recipe
    
pprint.pprint(combine_recipes)

  after removing the cwd from sys.path.
  after removing the cwd from sys.path.
  after removing the cwd from sys.path.


'DataFrame' object has no attribute 'CLIMATE_IDENTIFIER'
{'alert': {2400300: {'end_date': '2006-10-02', 'start_date': '1950-07-01'},
           2400305: {'end_date': '2021-10-03', 'start_date': '2006-10-03'}},
 'cambridge bay': {2400600: {'end_date': '2015-02-11',
                             'start_date': '1929-01-01'},
                   2400601: {'end_date': '2021-09-22',
                             'start_date': '2015-02-12'}},
 'cape-dyer a': {2400654: {'end_date': '1993-09-30',
                           'start_date': '1959-08-01'}},
 'eureka': {2401199: {'end_date': '2021-09-23', 'start_date': '2016-02-25'},
            2401200: {'end_date': '2016-02-24', 'start_date': '1947-05-01'}},
 'hall beach': {2402350: {'end_date': '2014-12-31', 'start_date': '1957-01-01'},
                2402354: {'end_date': '2021-09-23',
                          'start_date': '2015-01-01'}},
 'inuvik a': {2202570: {'end_date': '2006-03-31', 'start_date': '1957-03-01'}},
 'pond inlet a': {2403201: {'