In [3]:
# Creating Clausius-Clapeyron function

def CC(temp, temp_dew):
    """
    function that calculates relative humidity with temperature and dew point temperature
    temperautre input units: ˚C
    """
    # constant parameters
    Tref = 273.15  # reference temperature
    Es_Tref = 6.11 # saturation vapor pressure at reference temperature
    Lv = 2.5e+06   # latent heat of vaporation (J/kg)
    Rv = 461       # gas constant for moist air (J/kg)
    
    # transformed temperature inputs
    Tair = temp + Tref
    Tdew = temp_dew + Tref
    
    # Clausius-Clapeyron relation
    es = Es_Tref*np.exp((Lv/Rv)*(1/Tref - 1/Tair))
    e = Es_Tref*np.exp((Lv/Rv)*(1/Tref - 1/Tdew))
    rh = round(e/es,4)
    
    return(rh)

## Example 
Why after taking into account precip_time and filling the values I ended up with slighlty less valid site-years!

In [397]:
name = '/home/disk/eos8/ach315/data/ISH/1995/723140-13881-1995'
year = 1995
times = pd.date_range(season_start + str(year), season_end + str(year) + ' 23:00:00', freq='1H')
df = pd.read_fwf(name, names=colnames, colspecs=colspecs, header=None, index_col='time',
                 encoding='latin_1', dtype={'temp':int, 'precip':str}, 
                 parse_dates=True, date_parser=dateparse)
# remove duplicated hours, keeping only the first measurement per hour
df = df[df.index.duplicated(keep='first') == False]

# add in missing time values (corrects for leap years) and keeps only growing season
df = df.reindex(times, fill_value=np.nan)

# finding precip data
df.precip_time = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(0,2).astype(float)
df.precip_depth = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(2, 6).astype(float)
df.precip_quality = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(7,8)

# filtering out weather data based on quality code (data manual p.26)
# removing data with code 3 (Erroneous) or 7 (Erroneous, data originate from an NCEI data source)
# - temp
quality_temp = (df.temp_quality=='3') | (df.temp_quality=='7')
rows_temp = df[quality_temp].index
df.loc[rows_temp, 'temp'] = np.nan
# - dew temp
quality_dtemp = (df.dtemp_quality=='3') | (df.dtemp_quality=='7')
rows_dtemp = df[quality_dtemp].index
df.loc[rows_dtemp, 'dew_temp'] = np.nan
# - precip
quality_precip = (df.precip_quality=='3') | (df.precip_quality=='7')
rows_precip = df[quality_precip].index
df.loc[rows_precip, 'precip'] = np.nan

# replacing missing values with NANs                    
df.temp = df.temp.replace({9999: np.nan})
df.dew_temp = df.dew_temp.replace({9999: np.nan})
df.precip_time = df.precip_time.replace({99: np.nan})
df.precip_depth = df.precip_depth.replace({9999: np.nan})


df.head()

  res_values = method(rvalues)


Unnamed: 0,temp,temp_quality,dew_temp,dtemp_quality,precip,precip_time,precip_depth,precip_quality,precip_perhr,rh
1995-03-01 00:00:00,156.0,5,144.0,5,ADDAA101000095GA1999+001525075GA2999+022865045...,1.0,0.0,5,,
1995-03-01 01:00:00,150.0,5,144.0,5,ADDAA101000395GA1999+001835075GA2999+021345045...,1.0,3.0,5,,
1995-03-01 02:00:00,150.0,5,144.0,5,ADDAA101000595GA1999+001835075GA2999+006715065...,1.0,5.0,5,,
1995-03-01 03:00:00,144.0,5,144.0,5,ADDAA101000895AA206002091AG10002GA1999+0018350...,1.0,8.0,5,,
1995-03-01 04:00:00,139.0,5,139.0,5,ADDAA101000595AG10999GA1999+001835075GA2999+01...,1.0,5.0,5,,


In [398]:
df[df.precip_time > 1]

Unnamed: 0,temp,temp_quality,dew_temp,dtemp_quality,precip,precip_time,precip_depth,precip_quality,precip_perhr,rh
1995-05-01,239.0,5,139.0,5,ADDAA106999929AC11C9AG10004AY181061AY221061GA1...,6.0,,9,,


In [402]:
df[df.precip_depth.isna()]

Unnamed: 0,temp,temp_quality,dew_temp,dtemp_quality,precip,precip_time,precip_depth,precip_quality,precip_perhr,rh
1995-03-05 23:00:00,,9,,9,ADDAG10999GD14991+0051019GF1089919999999999999...,,,,,
1995-05-01 00:00:00,239.0,5,139.0,5,ADDAA106999929AC11C9AG10004AY181061AY221061GA1...,6.0,,9.0,,
1995-06-30 04:00:00,,9,,9,ADDAG10999GD13991+0018019GD23991+0042019GF1079...,,,,,


In [408]:
df['1995-04-30 18:00:00':'1995-05-01 01:00:00']

Unnamed: 0,temp,temp_quality,dew_temp,dtemp_quality,precip,precip_time,precip_depth,precip_quality,precip_perhr,rh
1995-04-30 18:00:00,256.0,5,122.0,5,ADDAA101000095AA206999929AC10C9AG10002AY181061...,1.0,0.0,5,,
1995-04-30 19:00:00,256.0,5,122.0,5,ADDAA101000095GA1999+015245085GA2999+030485035...,1.0,0.0,5,,
1995-04-30 20:00:00,261.0,5,139.0,5,ADDAA101000095GA1999+015245085GA2999+076205025...,1.0,0.0,5,,
1995-04-30 21:00:00,256.0,5,133.0,5,ADDAA101000095AG10000GA1999+015245085GA2999+07...,1.0,0.0,5,,
1995-04-30 22:00:00,261.0,5,133.0,5,ADDAA101000095AG10999GA1999+015245085GA2999+07...,1.0,0.0,5,,
1995-04-30 23:00:00,244.0,5,139.0,5,ADDAA101000095AG10999GA1999+015245085GA2999+07...,1.0,0.0,5,,
1995-05-01 00:00:00,239.0,5,139.0,5,ADDAA106999929AC11C9AG10004AY181061AY221061GA1...,6.0,,9,,
1995-05-01 01:00:00,167.0,5,156.0,5,ADDAA101011495AG10999ED136U18299GA1999+0030550...,1.0,114.0,5,,


### 1991-2020

In [None]:
# reading in USAF site information - from solar radiation dataset
df_sites = pd.read_csv('/home/disk/eos8/ach315/upscale/weadata/stations_info_9110.csv')
df_sites.head()

# select only class 1 stations (see NSRDB manual p.7-8 for more details)
df_class1 = df_sites[(df_sites['CLASS'] == 1)]
sites_class1 = list(df_class1.USAF) # station list with class 1 quality

In [392]:
%%time

# timing related settings
years = np.arange(1991, 2011) # timeframe in which we have weather data
dateparse = lambda dates: [datetime.datetime.strptime(d, "%Y%m%d%H") for d in dates] # dateparsing method to be used in pd.read_fwf
season_start, season_end = '03-01-', '11-30-' # setting a pretty borad range for growing season

# setting up np.read_fwf arguments
colnames = ['time', 'temp', 'temp_quality', 'dew_temp', 'dtemp_quality', 'precip', 
            'precip_time', 'precip_depth', 'precip_quality', 'precip_perhr', 'rh']
colspecs = [(15,25), (87,92), (92,93), (93,98), (98,99), (105,8193)]

# empty dataframes to store data from all site-years
df_temp_all = pd.DataFrame()
df_rh_all = pd.DataFrame()
df_precip_all = pd.DataFrame()

# reading in all weather data and storing as dataframe
for year in years:
    print(year) # output to track code progress
    times = pd.date_range(season_start + str(year), season_end + str(year) + ' 23:00:00', freq='1H')
    
    # creating dataframes to store all site data for an individual year
    df_temp_sites = pd.DataFrame(index=times)
    df_rh_sites = pd.DataFrame(index=times)
    df_precip_sites = pd.DataFrame(index=times)
    
    for site in sites_class1:
        # selecting for file associated with specified site
        file = glob.glob('/home/disk/eos8/ach315/data/ISH/' + str(year) + '/' + str(site) + '-*')
        
        if len(file) == 0: # when specified site does not exist for current year
            continue # skip the following code and move on to the next site in the for loop
        elif len(file) == 1:
            name = file[0]
        else: # when specified USAF site has more than one WBAN ID, resulting in more than one unique site
            print('choosing from multiple files: ', file)
            name = glob.glob('/home/disk/eos8/ach315/data/ISH/' + str(year) + '/' + str(site) + '-99999-*')[0]
            # for cases when a USAF station ID is linked to two WBAN IDs, select the one in which WBAN is listed as 99999
            
        # reading in raw weather data as fixed-width data format
        df = pd.read_fwf(name, names=colnames, colspecs=colspecs, header=None, index_col='time',
                         encoding='latin_1', dtype={'temp':int, 'precip':str}, 
                         parse_dates=True, date_parser=dateparse)
        # remove duplicated hours, keeping only the first measurement per hour
        df = df[df.index.duplicated(keep='first') == False]

        # add in missing time values (corrects for leap years) and keeps only growing season
        df = df.reindex(times, fill_value=np.nan)

        # finding precip data
        df.precip_time = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(0,2).astype(float)
        df.precip_depth = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(2, 6).astype(float)
        df.precip_quality = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(7,8)
                
        # filtering out weather data based on quality code (data manual p.26)
        # removing data with code 3 (Erroneous) or 7 (Erroneous, data originate from an NCEI data source)
        # - temp
        quality_temp = (df.temp_quality=='3') | (df.temp_quality=='7')
        rows_temp = df[quality_temp].index
        df.loc[rows_temp, 'temp'] = np.nan
        # - dew temp
        quality_dtemp = (df.dtemp_quality=='3') | (df.dtemp_quality=='7')
        rows_dtemp = df[quality_dtemp].index
        df.loc[rows_dtemp, 'dew_temp'] = np.nan
        # - precip
        quality_precip = (df.precip_quality=='3') | (df.precip_quality=='7')
        rows_precip = df[quality_precip].index
        df.loc[rows_precip, 'precip'] = np.nan

        # replacing missing values with NANs                    
        df.temp = df.temp.replace({9999: np.nan})
        df.dew_temp = df.dew_temp.replace({9999: np.nan})
        df.precip_time = df.precip_time.replace({99: np.nan})
        df.precip_depth = df.precip_depth.replace({9999: np.nan})

        # calculating hourly precip depth
        df.precip_perhr = df.precip_depth/df.precip_time
        
        # filling in precip data
        precip_tofill = df[df.precip_time>1].shape[0]
        if precip_tofill > 0:
            print(name, df[df.precip_time>1].shape[0], df[df.precip_depth.isna()].shape[0])
            for i in np.arange(precip_tofill):
                start = df[df.precip_time>1].index[i] - timedelta(hours=df[df.precip_time>1].precip_time[i]-1)
                end = df[df.precip_time>1].index[i]
                df.loc[start:end].precip_depth = df[df.precip_time>1].precip_perhr[i]
            print(df[df.precip_depth.isna()].shape[0])

        # converting units 
        df.temp = df.temp/10
        df.dew_temp = df.dew_temp/10
        df.precip_depth = df.precip_depth/10

        # calculating RH through Clausius Clapeyron
        df.rh = CC(df.temp, df.dew_temp)*100
        if df.loc[df.rh>100].rh.shape[0] > 0:
            print('rh > 100: ', year, name)        
            
        # combining weather data into individual dataframes
        df_temp = pd.DataFrame({site: df.temp}, index= times)
        df_rh = pd.DataFrame({site: df.rh}, index=times)
        df_precip = pd.DataFrame({site: df.precip_depth}, index=times)

        df_temp_sites = pd.concat([df_temp_sites, df_temp], axis= 1, sort=True)
        df_rh_sites = pd.concat([df_rh_sites, df_rh], axis=1, sort=True)
        df_precip_sites = pd.concat([df_precip_sites, df_precip], axis=1, sort=True)       

    # combining all site-years data together
    df_temp_all = pd.concat([df_temp_all, df_temp_sites], sort=True)
    df_rh_all = pd.concat([df_rh_all, df_rh_sites], sort=True)
    df_precip_all = pd.concat([df_precip_all, df_precip_sites], sort=True)

#df_temp_all.to_csv('/home/disk/eos8/ach315/upscale/weadata/temp_9110_class1.csv')
#df_precip_all.to_csv('/home/disk/eos8/ach315/upscale/weadata/precip_9110_class1.csv')
#df_rh_all.to_csv('/home/disk/eos8/ach315/upscale/weadata/rh_9110_class1.csv')

1991
/home/disk/eos8/ach315/data/ISH/1991/723100-13883-1991 1103 4571
548
/home/disk/eos8/ach315/data/ISH/1991/723403-13963-1991 1077 4629
576
/home/disk/eos8/ach315/data/ISH/1991/723723-23184-1991 1071 4546
417
/home/disk/eos8/ach315/data/ISH/1991/723815-23161-1991 1062 4513
273
/home/disk/eos8/ach315/data/ISH/1991/723870-03160-1991 1082 4514
307
/home/disk/eos8/ach315/data/ISH/1991/723925-23190-1991 799 5106
1831
/home/disk/eos8/ach315/data/ISH/1991/723940-23273-1991 550 5431
3234
/home/disk/eos8/ach315/data/ISH/1991/724030-93738-1991 1111 4538
481
/home/disk/eos8/ach315/data/ISH/1991/724280-14821-1991 1099 4623
650
/home/disk/eos8/ach315/data/ISH/1991/724345-03966-1991 1077 4621
667
/home/disk/eos8/ach315/data/ISH/1991/724450-03945-1991 1 1
0
/home/disk/eos8/ach315/data/ISH/1991/724585-93997-1991 1077 4595
618
/home/disk/eos8/ach315/data/ISH/1991/724620-23061-1991 470 5720
3956
/home/disk/eos8/ach315/data/ISH/1991/724665-93010-1991 48 198
12
/home/disk/eos8/ach315/data/ISH/1991/7247

In [333]:
#df_precip_all.to_csv('/home/disk/eos8/ach315/upscale/weadata/precip_9110_class1_filled.csv')