In [217]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import time 
import datetime

In [249]:
# preset time variables
season_start, season_end = '03-01-', '11-30-'
year = 2008
dateparse = lambda dates: [datetime.datetime.strptime(d, "%Y%m%d%H") for d in dates] # dateparsing method to be used in pd.read_fwf
season_start, season_end = '03-01-', '11-30-' # setting a pretty borad range for growing season

# setting up np.read_fwf arguments
colnames = ['time', 'temp', 'dew_temp', 'precip', 'precip_time', 'precip_depth', 'precip_condition', 'precip_quality', 'rh']
colspecs = [(15,25), (87,92), (93,98), (105,8193)]

# reading in file
name = '/home/disk/eos8/ach315/data/ISH/2008/722010-12836-2008'
df = pd.read_fwf(name, names=colnames, colspecs=colspecs, header=None, index_col='time',
                 encoding='latin_1', dtype={'temp':int, 'precip':str}, 
                 parse_dates=True, date_parser=dateparse)

# remove duplicated hours, keeping only the first measurement per hour
df = df[df.index.duplicated(keep='first') == False]

# add in missing time values (corrects for leap years) and keeps only growing season
times = pd.date_range(season_start + str(year), season_end + str(year), freq='1H')
df = df.reindex(times, fill_value=np.nan)

In [250]:
df.head()

Unnamed: 0,temp,dew_temp,precip,precip_time,precip_depth,precip_condition,precip_quality,rh
2008-03-01 00:00:00,211,139,ADDGA1001+999999999GF100991001001999999001001K...,,,,,
2008-03-01 01:00:00,206,144,ADDAA101000095GF100991999999999999999999MA1102...,,,,,
2008-03-01 02:00:00,206,144,ADDAA101000095GF100991999999999999999999MA1102...,,,,,
2008-03-01 03:00:00,206,144,ADDGA1001+999999999GF100991001001999999001001M...,,,,,
2008-03-01 04:00:00,200,150,ADDAA101000095GF100991999999999999999999MA1102...,,,,,


In [261]:
#df.precip_time = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(0,2).astype(float)
#df.precip_depth = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(2, 6).astype(float)
#df.precip_condition = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(6,7)
df.precip_quality = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(7,8)

In [258]:
df.precip_quality = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(7,8)


In [260]:
set(df.precip_quality)

{'1', '2', '5', '6', '9', 'P', nan}

In [247]:
df[df['precip'].str.find('ADDAA1')!=-1]['precip']

1991-03-01 00:00:00    ADDAA106000091AG10000GD12015+0300019GD22015+06...
1991-03-01 03:00:00    ADDAA199000091AG10000GD12015+0300019GF10199599...
1991-03-01 06:00:00    ADDAA106000091AG10000GF10099599999999999999999...
1991-03-01 09:00:00    ADDAA199000091AG10000GF10099599999999999999999...
1991-03-01 12:00:00    ADDAA106000091AG10000GD13991+0750013GF10699599...
                                             ...                        
1991-11-29 12:00:00    ADDAA106000091AG10000GD12991+0150019GD23991+02...
1991-11-29 15:00:00    ADDAA199000091AG10000GD12991+0210019GD23991+06...
1991-11-29 18:00:00    ADDAA106000091AG14000GD14991+0750019GF10899599...
1991-11-29 21:00:00    ADDAA199000091AG10000GD13991+0750013GF10599599...
1991-11-30 00:00:00    ADDAA106000091AG14000GD13991+0750019GF10599599...
Name: precip, Length: 2086, dtype: object

In [240]:
df[df['precip'].str.find('ADDAA1')!=-1]['precip']

1991-03-01 00:00:00    ADDAA106000091AG10000GD12015+0300019GD22015+06...
1991-03-01 03:00:00    ADDAA199000091AG10000GD12015+0300019GF10199599...
1991-03-01 06:00:00    ADDAA106000091AG10000GF10099599999999999999999...
1991-03-01 09:00:00    ADDAA199000091AG10000GF10099599999999999999999...
1991-03-01 12:00:00    ADDAA106000091AG10000GD13991+0750013GF10699599...
                                             ...                        
1991-11-29 12:00:00    ADDAA106000091AG10000GD12991+0150019GD23991+02...
1991-11-29 15:00:00    ADDAA199000091AG10000GD12991+0210019GD23991+06...
1991-11-29 18:00:00    ADDAA106000091AG14000GD14991+0750019GF10899599...
1991-11-29 21:00:00    ADDAA199000091AG10000GD13991+0750013GF10599599...
1991-11-30 00:00:00    ADDAA106000091AG14000GD13991+0750019GF10599599...
Name: precip, Length: 2086, dtype: object

In [213]:
flag = (df.precip_quality > 1) & (df.precip_quality < 9)
df.precip_depth[flag]

Series([], Freq: H, Name: precip_depth, dtype: float64)