In [25]:
import os
import numpy as np
import pandas as pd
import urllib
from datetime import datetime,timedelta

In [26]:
path = r'C:\Users\student\Documents\Alistair CoAgMET Projects\Pythonstuff\QC\data_pull'
os.chdir(path)

csv = (path + r'\csv.csv')

today = datetime.now().strftime('%Y-%m-%d')
yesterday = datetime.strftime(datetime.now() - timedelta(1), '%Y-%m-%d')

In [29]:
def datapull(a,b,c,d,e):
    """
    Returns an easy to understand dataframe which includes missing data which would
    normally be skipped over.
    a = Temporal frequency of data 
        -(Use 'daily' for daily data, 'hourly' for hourly data, and 'five_minute' for five minute data)
    b = Station(s) from which you like to request data 
        -(Use the five character station ID(s) which can be found at 
        https://coagmet.colostate.edu/station_index.php)
    c = Starting date of the time period from which you would like to request data
        -(All dates must be entered in 'yyyy-mm-dd' format)
    d = Ending date of the time period from which you would like to request data
        -(All dates must be entered in 'yyyy-mm-dd' format)
    e = Abbreviations of data elements you would like to request
        -(Abbrevations for each element can be found at 
        https://coagmet.colostate.edu/cgi-bin/web_services.pl)
    
    Each of the above arguments must be entered into the function as a STRING
    """
    
    # Pulls raw data from the CoAgMET web services page and loads into a csv
    urllib.request.urlretrieve('http://coagmet.colostate.edu/cgi-bin/web_services.pl?' +
                              'type=' + a +
                              '&sids=' + b +
                              '&sdate=' + c +
                              '&edate=' + d +
                              '&elems=' + e,
                              filename=csv)
    
    # Reads the csv into a pandas dataframe
    data = pd.read_csv(csv)
    # Gives the dataframe an index so that the it can be easily understood by pandas
    data = data.reset_index()
    
    # Creates a list which will be used as headers from the elements string used in argument 'e'
    headers = e.split(',')
    # Inserts a 'date' value into the first position in the list
    headers.insert(0,'date')
    # Inserts a 'station' value into the first postion in the list, 
    # moving the 'date' value over to the second position
    headers.insert(0,'station')
    
    # Tells pandas to used the above created list as header values for each column of the dataframe
    data.columns = headers
    
    # Tells the pandas to recognize values in the 'date' column as a datetime index
    data['date'] = pd.to_datetime(data.date)
    
    # Creates a new datetime index conditional upon the temporal type requested. This index will be
    # compared against the index provided by CoAgMET web services to identify and fill any missing
    # values that have been entirely skipped over by said web services
    if a == 'hourly':
        r = pd.date_range(start=c,end=d,freq='H')
    elif a == 'five_minute':
        r = pd.date_range(start=c,end=d,freq='5min')
    elif a == 'daily':
        r = pd.date_range(start=c,end=d,freq='D')
    
    # Creates spacing for data that was missing from the dataframe and also skipped over by the
    # web services request. All missing data will by filled in the value np.NaN
    data = data.set_index('date').reindex(r,copy=False).rename_axis('date')
    
    data.to_csv(path + b + '_' + c + '-' + d + '.csv')
    return data


In [31]:
df = datapull('five_minute','gyp01','2020-01-04','2020-01-08','tmean,rh,vp,sr,ws,wind_vec,' +
             'wind_std,pp,st5,st15,gust,gusttm,gustdir')

df

Unnamed: 0_level_0,station,tmean,rh,vp,sr,ws,wind_vec,wind_std,pp,st5,st15,gust,gusttm,gustdir
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2020-01-04 00:00:00,gyp01,-14.54,0.898,0.178,0.000,0.945,191.5,14.260,0.0,-3.606,-2.166,1.450,23:55:00,205.8
2020-01-04 00:05:00,gyp01,-14.51,0.883,0.176,0.000,0.837,140.7,4.865,0.0,-3.611,-2.165,1.117,00:03:00,139.1
2020-01-04 00:10:00,gyp01,-15.06,0.872,0.166,0.001,0.783,169.3,8.500,0.0,-3.625,-2.164,1.274,00:09:00,179.5
2020-01-04 00:15:00,gyp01,-15.60,0.867,0.158,0.000,0.864,186.9,20.110,0.0,-3.629,-2.163,1.225,00:12:00,204.4
2020-01-04 00:20:00,gyp01,-15.43,0.880,0.162,0.000,0.757,151.5,19.270,0.0,-3.633,-2.162,1.098,00:18:00,144.9
2020-01-04 00:25:00,gyp01,-15.29,0.887,0.166,0.000,0.931,187.3,3.104,0.0,-3.647,-2.161,1.196,00:20:00,186.3
2020-01-04 00:30:00,gyp01,-15.11,0.882,0.167,0.001,0.513,192.0,11.020,0.0,-3.652,-2.162,0.784,00:25:00,183.1
2020-01-04 00:35:00,gyp01,-15.05,0.883,0.168,0.001,0.655,257.9,16.730,0.0,-3.664,-2.168,0.902,00:34:00,255.5
2020-01-04 00:40:00,gyp01,-14.86,0.881,0.170,0.001,0.644,196.0,25.620,0.0,-3.672,-2.170,0.862,00:35:00,250.9
2020-01-04 00:45:00,gyp01,-15.04,0.875,0.167,0.000,0.333,211.5,30.240,0.0,-3.679,-2.169,0.715,00:45:00,276.6
