This file will, over several days, download the weather data for Chicago, every fourth hour.

The dataset will begin on 1 Jan 2018 and go through 31 Dec 2021.

In [3]:
import numpy as np
import pandas as pd
import json

In [4]:
# get API tokens
with open('.env.development') as f:
    data = f.read()

for line in data.split('\n'):
    head,sep,tail = line.partition(' = ')
    
    if head == 'crashAPIkey':
        crashAPIkey = tail
    elif head == 'weatherAPIkey':
        weatherAPIkey = tail

#print(f'CrashAPIToken {crashAPIkey},WeatherAPIToken {weatherAPIkey} ')

In [9]:
# load the existing data files and look for largest date value
with open('rawData.json','r') as fin:
    RawData = json.load(fin)

with open('processedData.json','r') as fin:
    ProcessedData = json.load(fin)

# rather than writing code to deal with leap years, different month lengths, etc.
# I'm going to use the numpy library's date functions
lastDate = ProcessedData[-1]['date'] # get the last date as a string

# find the next date and use that as the date to start 
d_start = np.datetime64(lastDate) + np.timedelta64(1,'D')

# date to end is 3 months beyond this
# this line 1) looks at the YYYY-MM of the lastDate string, increments it by 3
# 2) converts it to a string and adds the first day of month
# 3) converst it back to a datetime  
d_end =  np.datetime64(str(np.datetime64(lastDate[0:7]) + np.timedelta64(3,'M')) + '-01')

d_list = pd.date_range(d_start,d_end,freq="D")

In [10]:
def weatherLookup(apiQuery):
    import urllib
    import json

    try: 
        with urllib.request.urlopen(ApiQuery) as response:
            html = response.read()
    except urllib.error.HTTPError  as e:
        ErrorInfo= e.read().decode() 
        print('Error code: ', e.code, ErrorInfo)
        sys.exit()
    except  urllib.error.URLError as e:
        ErrorInfo= e.read().decode() 
        print('Error code: ', e.code,ErrorInfo)
        sys.exit()

    weather = json.loads(html.decode('utf-8'))
    return weather

In [11]:
lat = '41.89529666462941'
lon = '-87.67682552741503'

In [8]:
# create a list of days for which to request weather data

import datetime as dt
import pandas as pd

year = 2018
month = 1
d_start = dt.datetime(year, month, 1)
d_end = dt.datetime(year,month+3,1)
d_list = pd.date_range(d_start,d_end,freq="D")



In [12]:
rawData, processedData = [], []
times = ['00:00:01','04:00:01','8:00:01','12:00:01','16:00:01','20:00:01']
for d in d_list[:-1]:
    for t in times:
        dd = {}
        dateTimeS = f'{d.strftime("%Y-%m-%d")}T{t}'
        ApiQuery = 'https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/'+\
            lat+'%2C'+lon+'/'+dateTimeS+'?unitGroup=us&key='+weatherAPIkey+'&include=current&contentType=json'
        
        rawResponse = weatherLookup(ApiQuery)
        rawData.append(rawResponse)
        dd = rawResponse['currentConditions']
        dd['date'] = rawResponse['days'][0]['datetime']
        dd['time'] = dd['datetime']
        processedData.append(dd)

        print(d)

2018-04-02 00:00:00
2018-04-02 00:00:00
2018-04-02 00:00:00
2018-04-02 00:00:00
2018-04-02 00:00:00
2018-04-02 00:00:00
2018-04-03 00:00:00
2018-04-03 00:00:00
2018-04-03 00:00:00
2018-04-03 00:00:00
2018-04-03 00:00:00
2018-04-03 00:00:00
2018-04-04 00:00:00
2018-04-04 00:00:00
2018-04-04 00:00:00
2018-04-04 00:00:00
2018-04-04 00:00:00
2018-04-04 00:00:00
2018-04-05 00:00:00
2018-04-05 00:00:00
2018-04-05 00:00:00
2018-04-05 00:00:00
2018-04-05 00:00:00
2018-04-05 00:00:00
2018-04-06 00:00:00
2018-04-06 00:00:00
2018-04-06 00:00:00
2018-04-06 00:00:00
2018-04-06 00:00:00
2018-04-06 00:00:00
2018-04-07 00:00:00
2018-04-07 00:00:00
2018-04-07 00:00:00
2018-04-07 00:00:00
2018-04-07 00:00:00
2018-04-07 00:00:00
2018-04-08 00:00:00
2018-04-08 00:00:00
2018-04-08 00:00:00
2018-04-08 00:00:00
2018-04-08 00:00:00
2018-04-08 00:00:00
2018-04-09 00:00:00
2018-04-09 00:00:00
2018-04-09 00:00:00
2018-04-09 00:00:00
2018-04-09 00:00:00
2018-04-09 00:00:00
2018-04-10 00:00:00
2018-04-10 00:00:00


In [13]:
import json

with open('rawData.json','w') as fout:
    json.dump(rawData,fout)

In [14]:
with open('processedData.json','w') as fout:
    json.dump(processedData,fout)

In [9]:
lastDate = processedData[-1]['date']

In [10]:
import numpy as np
np.datetime64(lastDate[0:7])

numpy.datetime64('2018-04')

In [12]:
np.datetime64(lastDate) + np.timedelta64(1,'D')

numpy.datetime64('2018-04-02')

In [16]:
nextDate = np.datetime64(lastDate) + np.timedelta64(1,'D')
#lastDateDT = dt.datetime.strptime(lastDate,'%Y-%m-%d') # convert it to a DT type
#nextDate = lastDateDT + dt.timedelta(days=1) 

# create a list of days for which to request weather data
d_start = nextDate
d_end = np.datetime64(str(np.datetime64(lastDate[0:7]) + np.timedelta64(3,'M')) + '-01')
print(d_start,d_end)


2018-04-02 2018-07-01


In [17]:
d_list = pd.date_range(d_start,d_end,freq="D")

In [22]:
d_list[5].strftime('%Y-%m-%d')

'2018-04-07'