In [2]:
%matplotlib notebook

In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime as dt
import os
import glob
import calendar
import re

In [4]:
def read_precip(fili):
    """
    Reader for precipitation files contained in the NPSNOW data set

    In NPSNOW precip files, dry-days have amount=-9.9 and type=-9.
    These are set to 0.0 and 0 respectively.  This dry-days are distinct
    from days with trace precipitation, which have amount 0.0 and non-zero 
    type.

    Arguments
    ---------
    fili - file path

    Returns
    -------
    Pandas dataframe containg precipitation data for one station
    """

    df = pd.read_csv(fili, header=None, delim_whitespace=True,
                     #na_values={'amount': -9.9, 'type': -9}, 
                     names=['statid','month','day','year','amount','type'])

    isday = [row[1]['day'] <= \
             calendar.monthrange( int(row[1]['year']),int(row[1]['month']) )[1] \
             for row in df.iterrows()]
    df = df[isday] # only return rows with valid date

    df.index = [dt.datetime(int(row[1]['year']),
                            int(row[1]['month']),
                            int(row[1]['day']) ) \
                for row in df.iterrows()] # Reset index to date

    # Assumes zero precipitation/dry days are marked as -9.9, set to zero
    df = df.where(df > 0., 0.0)
    
    return df[['statid','amount','type']]


Get files containing precipitation.  Following Bogdanova et al (2006), I exclude NP 4, 5 and 14

In [5]:
dirpath = r'C:\Users\apbarret\Documents\data\SnowOnSeaIce\NPSNOW\precip'
filelist = glob.glob(os.path.join(dirpath,'np_??_??.pre'))
filelist = [f for f in filelist if not re.search('np_03|np_04|np_14',f)]

In [6]:
df = pd.concat([read_precip(f) for f in filelist])
df.head()

Unnamed: 0,statid,amount,type
1955-05-01,5,0.0,0
1955-05-02,5,0.0,0
1955-05-03,5,0.0,0
1955-05-04,5,0.0,0
1955-05-05,5,0.0,0


In [8]:
# Generate table organized by index time and station
table = pd.pivot_table(df, values='amount', index=df.index, columns='statid')
table = table.resample('MS').sum() # Generate month sums
table = table.where(table > 0) # Set zero to NaN
table.head()

statid,5,6,7,8,9,10,11,12,13,15,...,22,23,24,25,26,27,28,29,30,31
1955-05-01,1.3,,,,,,,,,,...,,,,,,,,,,
1955-06-01,7.2,,,,,,,,,,...,,,,,,,,,,
1955-07-01,13.7,,,,,,,,,,...,,,,,,,,,,
1955-08-01,27.6,,,,,,,,,,...,,,,,,,,,,
1955-09-01,12.3,,,,,,,,,,...,,,,,,,,,,


In [19]:
month_table = pd.DataFrame({'Parch': table.mean(axis=1), 'n': table.count(axis=1)})
month_table.resample('Y').sum(min_count=12).dropna()

Unnamed: 0,Parch,n
1956-12-31,158.3,16.0
1957-12-31,177.65,20.0
1958-12-31,149.2,24.0
1962-12-31,121.55,23.0
1963-12-31,125.85,24.0
1964-12-31,119.6,24.0
1965-12-31,104.25,16.0
1966-12-31,174.7,20.0
1967-12-31,135.45,16.0
1969-12-31,190.283333,26.0


In [9]:
table.count(axis=1).resample('Y').sum()

1955-12-31     8
1956-12-31    16
1957-12-31    20
1958-12-31    24
1959-12-31    11
1960-12-31     8
1961-12-31    13
1962-12-31    23
1963-12-31    24
1964-12-31    24
1965-12-31    16
1966-12-31    20
1967-12-31    16
1968-12-31    19
1969-12-31    26
1970-12-31    33
1971-12-31    39
1972-12-31    28
1973-12-31    18
1974-12-31    16
1975-12-31    12
1976-12-31    23
1977-12-31    24
1978-12-31    29
1979-12-31    24
1980-12-31    22
1981-12-31    19
1982-12-31    15
1983-12-31    19
1984-12-31    21
1985-12-31    12
1986-12-31    19
1987-12-31    24
1988-12-31    29
1989-12-31    24
1990-12-31    24
1991-12-31     5
Freq: A-DEC, dtype: int64