In [19]:
import os
import urllib
import numpy as np
import pandas as pd

In [20]:
path = r'C:\Users\student\Documents\Alistair CoAgMET Projects\Pythonstuff\QC\auto_qc'
os.chdir(path)

csv = (path + r'\csv.csv')
txt = (path + r'\qaqc_report.txt')
q = open(txt,'w+')


all_fm_data = 'tmean,rh,vp,sr,ws,wind_vec,wind_std,pp,st5,st15,gust,gusttm,gustdir'

In [21]:
def datapull(a,b,c,d,e):
    """
    Returns an easy to understand dataframe which includes missing data which would
    normally be skipped over.
    a = Temporal frequency of data 
        -(Use 'daily' for daily data, 'hourly' for hourly data, and 'five_minute' for five minute data)
    b = Station(s) from which you like to request data 
        -(Use the five character station ID(s) which can be found at 
        https://coagmet.colostate.edu/station_index.php)
    c = Starting date of the time period from which you would like to request data
        -(All dates must be entered in 'yyyy-mm-dd' format)
    d = Ending date of the time period from which you would like to request data
        -(All dates must be entered in 'yyyy-mm-dd' format)
    e = Abbreviations of data elements you would like to request
        -(Abbrevations for each element can be found at 
        https://coagmet.colostate.edu/cgi-bin/web_services.pl)
    
    Each of the above arguments must be entered into the function as a STRING
    """
    
    # Pulls raw data from the CoAgMET web services page and loads into a csv
    urllib.request.urlretrieve('http://coagmet.colostate.edu/cgi-bin/web_services.pl?' +
                              'type=' + a +
                              '&sids=' + b +
                              '&sdate=' + c +
                              '&edate=' + d +
                              '&elems=' + e,
                              filename=csv)
    
    # Reads the csv into a pandas dataframe
    data = pd.read_csv(csv)
    # Gives the dataframe an index so that the it can be easily understood by pandas
    data = data.reset_index()
    
    # Creates a list which will be used as headers from the elements string used in argument 'e'
    headers = e.split(',')
    # Inserts a 'date' value into the first position in the list
    headers.insert(0,'date')
    # Inserts a 'station' value into the first postion in the list, 
    # moving the 'date' value over to the second position
    headers.insert(0,'station')
    
    # Tells pandas to used the above created list as header values for each column of the dataframe
    data.columns = headers
    
    # Tells the pandas to recognize values in the 'date' column as a datetime index
    data['date'] = pd.to_datetime(data.date)
    
    # Creates a new datetime index conditional upon the temporal type requested. This index will be
    # compared against the index provided by CoAgMET web services to identify and fill any missing
    # values that have been entirely skipped over by said web services
    if a == 'hourly':
        r = pd.date_range(start=c,end=d,freq='H')
    elif a == 'five_minute':
        r = pd.date_range(start=c,end=d,freq='5min')
    elif a == 'daily':
        r = pd.date_range(start=c,end=d,freq='D')
    
    # Creates spacing for data that was missing from the dataframe and also skipped over by the
    # web services request. All missing data will by filled in the value np.NaN
    data = data.set_index('date').reindex(r,copy=False).rename_axis('date')
    
    # Finds and reports data errors to a text file
    g.write('QC report for whatever drainage basin we want!' + )
    n = 0
    for index,rows in data.iterrows():
        if(pd.isnull(data['tmean'][n])):
            g.write(str(data['station'][n]) + 
                    ' is missing temperature data at ' + str(data['date'][n]))
        elif(pd.isnull(data['rh'][n])):
            g.write(str(data['station'][n]) + 
                    ' is missing relative humidity data at ' + str(data['date'][n]))
        elif(pd.isnull(data['vp'][n])):
            g.write(str(data['station'][n]) + 
                    ' is missing vapor pressure data at ' + str(data['date'][n]))
        elif(pd.isnull(data['sr'][n])):
            g.write(str(data['station'][n]) + 
                    ' is missing solar radiation data at ' + str(data['date'][n]))
        elif(pd.isnull(data['ws'][n])):
    # Replaces any negative values in the gust direction column with 360°
    gustdir_check = data.gustdir < 0
    winddir_check = data.wind_vec < 0
    
    data.loc[gustdir_check,'gustdir'] = 360
    data.loc[winddir_check,'wind_vec'] = 360
    
    # Replaces any negative solar radiation values with 0
    sr_check = data.sr < 0
    data.loc[sr_check,'sr'] = 0
    
    
    
    
    
    return data

In [23]:
df = datapull('five_minute','avn01','2020-01-12','2020-01-13',all_fm_data)
df
df.to_csv(path + r'\before_wind_correct.csv')

In [24]:
mask = df.gustdir < 0
column_name = 'gustdir'
df.loc[mask,column_name] = 360

In [25]:
df.to_csv(path + r'\after_wind_correct.csv')