In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import xarray as xr
import datetime
import time 
from timezonefinder import TimezoneFinder
import pytz
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import cartopy.io.shapereader as shpreader
import os

from datetime import timedelta
from palettable.colorbrewer.sequential import OrRd_6
from palettable.colorbrewer.sequential import YlGn_9
from palettable.colorbrewer.sequential import YlGnBu_8
from palettable.colorbrewer.sequential import RdPu_5

In [9]:
# Creating Clausius-Clapeyron function

def CC(temp, temp_dew):
    """
    function that calculates relative humidity with temperature and dew point temperature
    temperautre input units: ˚C
    """
    # constant parameters
    Tref = 273.15  # reference temperature
    Es_Tref = 6.11 # saturation vapor pressure at reference temperature
    Lv = 2.5e+06   # latent heat of vaporation (J/kg)
    Rv = 461       # gas constant for moist air (J/kg)
    
    # transformed temperature inputs
    Tair = temp + Tref
    Tdew = temp_dew + Tref
    
    # Clausius-Clapeyron relation
    es = Es_Tref*np.exp((Lv/Rv)*(1/Tref - 1/Tair))
    e = Es_Tref*np.exp((Lv/Rv)*(1/Tref - 1/Tdew))
    rh = round(e/es,4)
    
    return(rh)

## 1961-1990

In [194]:
%%time 
# run time ~30 mins

# timing related settings
years = np.arange(1961, 1991) # timeframe in which we have weather data
dateparse = lambda dates: [datetime.datetime.strptime(d, "%Y%m%d%H") for d in dates] # dateparsing method to be used in pd.read_fwf
season_start, season_end = '03-01-', '11-30-' # setting a pretty borad range for growing season

# setting up np.read_fwf arguments
colnames = ['time', 'temp', 'temp_quality', 'dew_temp', 'dtemp_quality', 'precip', 
            'precip_time', 'precip_depth', 'precip_quality', 'precip_perhr', 'rh']
colspecs = [(15,25), (87,92), (92,93), (93,98), (98,99), (105,8193)]


# empty dataframes to store data from all site-years
df_temp_all = pd.DataFrame()
df_rh_all = pd.DataFrame()
df_precip_all = pd.DataFrame()

# reading in all weather data and storing as dataframe
for year in years:
    print(year) # output to track code progress
    times = pd.date_range(season_start + str(year), season_end + str(year) + ' 23:00:00', freq='1H')
    fnames = glob.glob('/home/disk/eos8/ach315/data/ISH/' + str(year) + '/*')
    
    # creating dataframes to store all site data for an individual year
    df_temp_sites = pd.DataFrame(index=times)
    df_rh_sites = pd.DataFrame(index=times)
    df_precip_sites = pd.DataFrame(index=times)
    
    for name in fnames:
        # WBAN site name 
        site_id = name.split('/')[-1].split('-')[-2]
        
        # read in individual files
        df = pd.read_fwf(name, names=colnames, colspecs=colspecs, header=None, index_col='time',
                         encoding='latin_1', dtype={'temp':int, 'precip':str}, 
                         parse_dates=True, date_parser=dateparse)
    
        # remove duplicated hours, keeping only the first measurement per hour
        df = df[df.index.duplicated(keep='first') == False]
        
        # add in missing time values (corrects for leap years) and keeps only growing season
        df = df.reindex(times, fill_value=np.nan)
        
        # finding precip data
        df.precip_time = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(0,2).astype(float)
        df.precip_depth = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(2, 6).astype(float)
        df.precip_quality = df[df['precip'].str.find('ADDAA1')!=-1]['precip'].str.split('ADDAA1').str.get(1).str.slice(7,8).astype(float)
                    
        # filtering out weather data based on quality code (data manual p.26)
        # removing data with code 3 (Erroneous) or 7 (Erroneous, data originate from an NCEI data source)
        # - temp
        quality_temp = (df.temp_quality=='3') | (df.temp_quality=='7')
        rows_temp = df[quality_temp].index
        df.loc[rows_temp, 'temp'] = np.nan
        # - dew temp
        quality_dtemp = (df.dtemp_quality=='3') | (df.dtemp_quality=='7')
        rows_dtemp = df[quality_dtemp].index
        df.loc[rows_dtemp, 'dew_temp'] = np.nan
        # - precip
        quality_precip = (df.precip_quality=='3') | (df.precip_quality=='7')
        rows_precip = df[quality_precip].index
        df.loc[rows_precip, 'precip'] = np.nan        

        # replacing missing data with NaN
        df.temp = df.temp.replace({9999: np.nan})
        df.dew_temp = df.dew_temp.replace({9999: np.nan})
        df.precip_time = df.precip_time.replace({99: np.nan})
        df.precip_depth = df.precip_depth.replace({9999: np.nan})
        
        # calculating hourly precip depth
        df.precip_perhr = df.precip_depth/df.precip_time
        
        # filling in precip data
        precip_tofill = df[df.precip_time>1].shape[0]
        if precip_tofill > 0:
            print(name, df[df.precip_time>1].shape[0], df[df.precip_depth.isna()].shape[0])
            for i in np.arange(precip_tofill):
                start = df[df.precip_time>1].index[i] - timedelta(hours=df[df.precip_time>1].precip_time[i]-1)
                end = df[df.precip_time>1].index[i]
                df.loc[start:end].precip_depth = df[df.precip_time>1].precip_perhr[i]
            print(df[df.precip_depth.isna()].shape[0])
        
        # converting units 
        df.temp = df.temp/10
        df.dew_temp = df.dew_temp/10
        df.precip_depth = df.precip_depth/10
        
        # calculating RH through Clausius Clapeyron
        df.rh = CC(df.temp, df.dew_temp)*100
        if df.loc[df.rh>100].rh.shape[0] > 0:
            print('rh > 100: ', year, name)        

        # combining weather data into individual dataframes
        df_temp = pd.DataFrame({site_id: df.temp}, index= times)
        df_rh = pd.DataFrame({site_id: df.rh}, index=times)
        df_precip = pd.DataFrame({site_id: df.precip_depth}, index=times)
        
        df_temp_sites = pd.concat([df_temp_sites, df_temp], axis= 1, sort=True)
        df_rh_sites = pd.concat([df_rh_sites, df_rh], axis=1, sort=True)
        df_precip_sites = pd.concat([df_precip_sites, df_precip], axis=1, sort=True)

    # combining all site-years data together
    df_temp_all = pd.concat([df_temp_all, df_temp_sites], sort=True)
    df_rh_all = pd.concat([df_rh_all, df_rh_sites], sort=True)
    df_precip_all = pd.concat([df_precip_all, df_precip_sites], sort=True)

#df_temp_all.to_csv('/home/disk/eos8/ach315/upscale/weadata/temp_6190.csv')
#df_precip_all.to_csv('/home/disk/eos8/ach315/upscale/weadata/precip_6190.csv')
#df_rh_all.to_csv('/home/disk/eos8/ach315/upscale/weadata/rh_6190.csv')

1961


  res_values = method(rvalues)


/home/disk/eos8/ach315/data/ISH/1961/911820-22521-1961 275 6452


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


3048
/home/disk/eos8/ach315/data/ISH/1961/724660-93037-1961 275 6600
6600
/home/disk/eos8/ach315/data/ISH/1961/725300-94846-1961 275 6362
888
/home/disk/eos8/ach315/data/ISH/1961/727930-24233-1961 275 6366
995
1962
/home/disk/eos8/ach315/data/ISH/1962/911820-22521-1962 214 5034
2446
/home/disk/eos8/ach315/data/ISH/1962/727930-24233-1962 275 6365
971
/home/disk/eos8/ach315/data/ISH/1962/723815-23161-1962 275 6328
83
/home/disk/eos8/ach315/data/ISH/1962/722060-13889-1962 2 2
1
/home/disk/eos8/ach315/data/ISH/1962/724660-93037-1962 275 6600
6600
/home/disk/eos8/ach315/data/ISH/1962/725300-94846-1962 92 2138
367
1963
/home/disk/eos8/ach315/data/ISH/1963/723815-23161-1963 275 6333
203
1964
/home/disk/eos8/ach315/data/ISH/1964/724660-93037-1964 275 6600
6600
/home/disk/eos8/ach315/data/ISH/1964/723815-23161-1964 275 6336
275
/home/disk/eos8/ach315/data/ISH/1964/727930-24233-1964 275 6375
1211
1965
1966
1967
1968
1969
1970
1971
1972
1973
/home/disk/eos8/ach315/data/ISH/1973/725020-14734-1973 