# Nenana Ice Classic Data Gathering - Weather Data
This notebook was used for gathering the weather data used for this project.
## Data Source
* Weather information <a href = https://darksky.net/poweredby/>Powered By DarkSky</a>

In [1]:
# imports
import numpy as np
import pandas as pd
import requests
import json
import datetime as dt
import gc

In [2]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

keys = get_keys("/Users/davidwalkup/.secret/api_keys.json")
api_key = keys['darksky_api_key']

In [3]:
# initialize date iterators
year_list = [year for year in range(1989, 2020)]
month_dict = {1 : (1, 32), 2 : [(1, 29), (1, 30)], 3 : (1, 32), 4 : (1, 31), 5 : (1, 32)}

In [4]:
# make list of dates for API query
query_dates = []
for year in year_list:
    for month in month_dict:
        start_day = month_dict[month][0]
        end_day = month_dict[month][1]
        if month == 2:
            start_day = month_dict[month][0][0] # Feb 1 is always start day
            if year % 4 == 0: # leap year
                end_day = month_dict[month][1][1] # Feb 29 is end day if leap year
            else:
                end_day = month_dict[month][0][1]
        for day in range(start_day, end_day):
            query_dates.append(dt.date(year, month, day).isoformat())

In [5]:
query_dates[-5:]

['2019-05-27', '2019-05-28', '2019-05-29', '2019-05-30', '2019-05-31']

In [6]:
len(query_dates)

4688

In [7]:
# delete unneeded variables and collect garbage
del year_list, month_dict
gc.collect()

5

In [8]:
#initialize weather_data dict
weather_data = {}

In [9]:
# define variable for Nenana, Alaska lat & long
nenana_coords = '64.558056,-149.090556'

In [10]:
# test API call
response = requests.get(f'https://api.darksky.net/forecast/{api_key}/{nenana_coords},2009-04-02T23:59:59?exclude=currently,hourly,minutely,alerts,flags')
response.json()

{'latitude': 64.558056,
 'longitude': -149.090556,
 'timezone': 'America/Anchorage',
 'daily': {'data': [{'time': 1238659200,
    'summary': 'Partly cloudy throughout the day.',
    'icon': 'partly-cloudy-day',
    'sunriseTime': 1238684940,
    'sunsetTime': 1238734380,
    'moonPhase': 0.27,
    'precipIntensity': 0,
    'precipIntensityMax': 0,
    'precipProbability': 0,
    'temperatureHigh': 24.25,
    'temperatureHighTime': 1238725380,
    'temperatureLow': -7.03,
    'temperatureLowTime': 1238773200,
    'apparentTemperatureHigh': 18.23,
    'apparentTemperatureHighTime': 1238727600,
    'apparentTemperatureLow': -6.54,
    'apparentTemperatureLowTime': 1238773200,
    'dewPoint': -5.47,
    'humidity': 0.58,
    'pressure': 1019.5,
    'windSpeed': 3.44,
    'windGust': 11.84,
    'windGustTime': 1238706420,
    'windBearing': 76,
    'cloudCover': 0.39,
    'uvIndex': 2,
    'uvIndexTime': 1238706240,
    'visibility': 9.997,
    'temperatureMin': -13.76,
    'temperatureMinT

In [11]:
#get weather data from DarkSky for dates in query_dates
# started at 11:48 AM
for date in query_dates:
    response = requests.get(f'https://api.darksky.net/forecast/{api_key}/{nenana_coords},{date}T23:59:59?exclude=currently,hourly,minutely,alerts,flags')
    if response.status_code == requests.codes.ok:
        weather_data[date] = response.json()
    else:
        print('Status returned: ' + str(response.status_code) + ' for date: ' + date)
        continue

In [12]:
len(weather_data)

4688

In [13]:
# save weather data as a json file so I don't lose it when restarting the kernel
with open('../data/weather_data_1989-2020.json', 'w') as fp:
    json.dump(weather_data, fp)

In [None]:
# read weather data from json file
with open('../data/weather_data_1989-2020.json', 'r') as fp:
    weather_data = json.load(fp)
weather_data['2019-05-31']

In [14]:
query_dates[0]

'1989-01-01'

In [15]:
len(weather_data)

4688

In [16]:
weather_data['1989-04-07']

{'latitude': 64.558056,
 'longitude': -149.090556,
 'timezone': 'America/Anchorage',
 'offset': -8}

In [17]:
weather_dict = {}
missing_daily_data = []
for k in weather_data:
    if 'daily' in weather_data[k]:
        weather_dict[k] = weather_data[k]['daily']['data'][0]
    else:
        missing_daily_data.append(k)

In [18]:
print('weather dict length:', len(weather_dict), '\n# of missing entries:', len(missing_daily_data))

weather dict length: 4456 
# of missing entries: 232


In [19]:
missing_daily_data

['1989-04-07',
 '1989-04-14',
 '1989-04-15',
 '1989-04-16',
 '1989-04-18',
 '1989-04-19',
 '1989-04-20',
 '1989-04-21',
 '1989-04-22',
 '1989-04-23',
 '1989-04-24',
 '1989-04-26',
 '1990-05-12',
 '1990-05-13',
 '1991-01-09',
 '1991-01-18',
 '1991-01-20',
 '1991-01-21',
 '1991-02-03',
 '1991-03-31',
 '1991-04-04',
 '1991-04-26',
 '1992-01-27',
 '1992-01-30',
 '1992-02-01',
 '1992-02-10',
 '1992-02-18',
 '1992-02-19',
 '1992-02-20',
 '1992-02-21',
 '1992-03-01',
 '1993-03-23',
 '1993-03-24',
 '1994-01-31',
 '1994-02-01',
 '1994-05-06',
 '1995-03-29',
 '1995-03-30',
 '1995-03-31',
 '1995-04-01',
 '1995-04-02',
 '1995-04-03',
 '1995-04-04',
 '1995-04-06',
 '1995-04-07',
 '1995-04-08',
 '1995-04-09',
 '1995-04-10',
 '1995-04-11',
 '1995-04-13',
 '1995-04-14',
 '1995-04-16',
 '1995-04-17',
 '1995-04-18',
 '1995-04-19',
 '1995-04-20',
 '1995-04-21',
 '1995-04-22',
 '1995-04-23',
 '1995-04-24',
 '1995-04-25',
 '1995-04-26',
 '1995-04-27',
 '1995-04-28',
 '1995-04-29',
 '1995-04-30',
 '1995-05-

In [24]:
# verify data is missing at DarkSky, not locally
for date in missing_daily_data:
    response = requests.get(f'https://api.darksky.net/forecast/{api_key}/{nenana_coords},{date}T23:59:59?exclude=currently,hourly,minutely,alerts,flags')
    if response.status_code == requests.codes.ok:
        weather_data[date] = response.json()
    else:
        print('Status returned: ' + str(response.status_code) + ' for date: ' + date)
        continue

In [25]:
missing_daily_data = []
for k in weather_data:
    if 'daily' in weather_data[k]:
        weather_dict[k] = weather_data[k]['daily']['data'][0]
    else:
        missing_daily_data.append(k)

In [26]:
print('weather dict length:', len(weather_dict), '\n# of missing entries:', len(missing_daily_data))

weather dict length: 4456 
# of missing entries: 232


In [27]:
missing_daily_data

['1989-04-07',
 '1989-04-14',
 '1989-04-15',
 '1989-04-16',
 '1989-04-18',
 '1989-04-19',
 '1989-04-20',
 '1989-04-21',
 '1989-04-22',
 '1989-04-23',
 '1989-04-24',
 '1989-04-26',
 '1990-05-12',
 '1990-05-13',
 '1991-01-09',
 '1991-01-18',
 '1991-01-20',
 '1991-01-21',
 '1991-02-03',
 '1991-03-31',
 '1991-04-04',
 '1991-04-26',
 '1992-01-27',
 '1992-01-30',
 '1992-02-01',
 '1992-02-10',
 '1992-02-18',
 '1992-02-19',
 '1992-02-20',
 '1992-02-21',
 '1992-03-01',
 '1993-03-23',
 '1993-03-24',
 '1994-01-31',
 '1994-02-01',
 '1994-05-06',
 '1995-03-29',
 '1995-03-30',
 '1995-03-31',
 '1995-04-01',
 '1995-04-02',
 '1995-04-03',
 '1995-04-04',
 '1995-04-06',
 '1995-04-07',
 '1995-04-08',
 '1995-04-09',
 '1995-04-10',
 '1995-04-11',
 '1995-04-13',
 '1995-04-14',
 '1995-04-16',
 '1995-04-17',
 '1995-04-18',
 '1995-04-19',
 '1995-04-20',
 '1995-04-21',
 '1995-04-22',
 '1995-04-23',
 '1995-04-24',
 '1995-04-25',
 '1995-04-26',
 '1995-04-27',
 '1995-04-28',
 '1995-04-29',
 '1995-04-30',
 '1995-05-

In [28]:
weather_df = pd.DataFrame.from_dict(weather_dict).T

In [29]:
weather_df.head()

Unnamed: 0,time,sunriseTime,sunsetTime,moonPhase,temperatureHigh,temperatureHighTime,temperatureLow,temperatureLowTime,apparentTemperatureHigh,apparentTemperatureHighTime,...,precipIntensity,precipIntensityMax,precipIntensityMaxTime,precipAccumulation,windGust,windGustTime,icon,precipProbability,summary,ozone
1989-01-01,599648000.0,599688000.0,599703000.0,0.81,15.77,599674000.0,10.7,599717000.0,13.85,599701000.0,...,,,,,,,,,,
1989-01-02,599735000.0,599774000.0,599789000.0,0.84,17.8,599789000.0,-10.28,599842000.0,15.37,599778000.0,...,,,,,,,,,,
1989-01-03,599821000.0,599860000.0,599876000.0,0.88,4.69,599879000.0,-16.58,599912000.0,3.28,599886000.0,...,,,,,,,,,,
1989-01-04,599908000.0,599947000.0,599962000.0,0.91,7.67,599940000.0,2.57,600009000.0,0.52,599940000.0,...,,,,,,,,,,
1989-01-05,599994000.0,600033000.0,600049000.0,0.94,14.7,600059000.0,10.64,600063000.0,5.23,600059000.0,...,,,,,,,,,,


In [30]:
weather_df.shape

(4456, 40)

In [31]:
weather_df.to_csv('../data/raw_weather_1989-2020.csv', index_label = 'Date')