# Nenana Ice Classic Data Gathering - Weather Data
This notebook was used for gathering the weather data used for this project.
## Data Source
* Weather information <a href = https://darksky.net/poweredby/>Powered By DarkSky</a>

In [1]:
# imports
import numpy as np
import pandas as pd
import requests
import json
import datetime as dt
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import gc

In [2]:
def get_keys(path):
    with open(path) as f:
        return json.load(f)

keys = get_keys("/Users/davidwalkup/.secret/api_keys.json")
api_key = keys['darksky_api_key']

In [3]:
# initialize year and date lists
years_list = [year for year in range(1989, 2020)]
months_list = [3, 4, 5]
days_list = [day for day in range(1, 32)]

In [4]:
# make date list
# **TODO** fix this to use dt.date(year, month, day).isoformat()
query_dates = []
for year in years_list:
    for month in months_list:
        for day in days_list:
            if len(str(day)) < 2:
                temp_day = '0' + str(day)
            else:
                temp_day = str(day)
            temp_date = str(year) + '-0' + str(month) + '-' + temp_day
            if month == 4 and day < 31:
                query_dates.append(temp_date)
            elif month == 4:
                pass
            else:
                query_dates.append(temp_date)

In [5]:
len(query_dates)

2852

In [6]:
# delete unneeded variables and collect garbage
# del years_list, months_list, days_list
# gc.collect()

In [7]:
#initialize weather_data dict
weather_data = {}

In [8]:
# define variable for Nenana, Alaska lat & long
nenana_coords = '64.558056,-149.090556'

In [9]:
# test API call
response = requests.get(f'https://api.darksky.net/forecast/{api_key}/{nenana_coords},2009-04-02T23:59:59?exclude=currently,hourly,minutely,alerts,flags')
response.json()

{'latitude': 64.558056,
 'longitude': -149.090556,
 'timezone': 'America/Anchorage',
 'daily': {'data': [{'time': 1238659200,
    'summary': 'Partly cloudy throughout the day.',
    'icon': 'partly-cloudy-day',
    'sunriseTime': 1238684940,
    'sunsetTime': 1238734380,
    'moonPhase': 0.27,
    'precipIntensity': 0,
    'precipIntensityMax': 0,
    'precipProbability': 0,
    'temperatureHigh': 24.72,
    'temperatureHighTime': 1238726100,
    'temperatureLow': -5.6,
    'temperatureLowTime': 1238773740,
    'apparentTemperatureHigh': 19.24,
    'apparentTemperatureHighTime': 1238727360,
    'apparentTemperatureLow': -5.11,
    'apparentTemperatureLowTime': 1238773740,
    'dewPoint': -4.96,
    'humidity': 0.57,
    'pressure': 1019.5,
    'windSpeed': 2.97,
    'windGust': 11.84,
    'windGustTime': 1238706420,
    'windBearing': 90,
    'cloudCover': 0.39,
    'uvIndex': 2,
    'uvIndexTime': 1238706240,
    'visibility': 9.997,
    'temperatureMin': -12.43,
    'temperatureMinTi

In [10]:
#get weather data from DarkSky for dates in query_dates
%time
for date in query_dates:
    response = requests.get(f'https://api.darksky.net/forecast/{api_key}/{nenana_coords},{date}T23:59:59?exclude=currently,hourly,minutely,alerts,flags')
    if response.status_code == requests.codes.ok:
        weather_data[date] = response.json()
    else:
        print('Status returned: ' + str(response.status_code) + ' for date: ' + date)
        break

CPU times: user 15 µs, sys: 21 µs, total: 36 µs
Wall time: 58.9 µs


# Place Marker

In [11]:
len(weather_data)

2852

In [12]:
# save weather data as a json file so I don't lose it when restarting the kernel
with open('../data/weather_data_1989-2020.json', 'w') as fp:
    json.dump(weather_data, fp)

In [13]:
# read weather data from json file
with open('../data/weather_data_1989-2020.json', 'r') as fp:
    weather_data = json.load(fp)
weather_data['2019-05-31']

{'latitude': 64.558056,
 'longitude': -149.090556,
 'timezone': 'America/Anchorage',
 'daily': {'data': [{'time': 1559289600,
    'summary': 'Light rain starting in the afternoon.',
    'icon': 'rain',
    'sunriseTime': 1559303220,
    'sunsetTime': 1559376240,
    'moonPhase': 0.92,
    'precipIntensity': 0.0067,
    'precipIntensityMax': 0.0279,
    'precipIntensityMaxTime': 1559366340,
    'precipProbability': 0.94,
    'precipType': 'rain',
    'temperatureHigh': 64.6,
    'temperatureHighTime': 1559342880,
    'temperatureLow': 51.41,
    'temperatureLowTime': 1559396700,
    'apparentTemperatureHigh': 64.1,
    'apparentTemperatureHighTime': 1559342880,
    'apparentTemperatureLow': 51.9,
    'apparentTemperatureLowTime': 1559396700,
    'dewPoint': 48.55,
    'humidity': 0.69,
    'pressure': 1005.3,
    'windSpeed': 2.29,
    'windGust': 8.64,
    'windGustTime': 1559358000,
    'windBearing': 343,
    'cloudCover': 0.91,
    'uvIndex': 3,
    'uvIndexTime': 1559339460,
    'v

In [14]:
query_dates[0]

'1989-03-01'

In [None]:
# this is to read the data from the .json file, if needed
# temp_df = pd.read_json('weather_data_1989-2020.json',
#                        orient = 'index')

In [26]:
len(weather_data)

2852

In [30]:
weather_data['1989-04-07']

{'latitude': 64.558056,
 'longitude': -149.090556,
 'timezone': 'America/Anchorage',
 'offset': -8}

In [32]:
weather_dict = {}
missing_daily_data = []
for k in weather_data:
    if 'daily' in weather_data[k]:
        weather_dict[k] = weather_data[k]['daily']['data'][0]
    else:
        missing_daily_data.append(k)

In [34]:
print('weather dict length:', len(weather_dict), '\n# of missing entries:', len(missing_daily_data))

weather dict length: 2679 
# of missing entries: 173


In [36]:
missing_daily_data

['1989-04-07',
 '1989-04-14',
 '1989-04-15',
 '1989-04-16',
 '1989-04-18',
 '1989-04-19',
 '1989-04-20',
 '1989-04-21',
 '1989-04-22',
 '1989-04-23',
 '1989-04-24',
 '1989-04-26',
 '1990-05-12',
 '1990-05-13',
 '1991-03-31',
 '1991-04-04',
 '1991-04-26',
 '1992-03-01',
 '1993-03-23',
 '1993-03-24',
 '1994-05-06',
 '1995-03-29',
 '1995-03-30',
 '1995-03-31',
 '1995-04-01',
 '1995-04-02',
 '1995-04-03',
 '1995-04-04',
 '1995-04-06',
 '1995-04-07',
 '1995-04-08',
 '1995-04-09',
 '1995-04-10',
 '1995-04-11',
 '1995-04-13',
 '1995-04-14',
 '1995-04-16',
 '1995-04-17',
 '1995-04-18',
 '1995-04-19',
 '1995-04-20',
 '1995-04-21',
 '1995-04-22',
 '1995-04-23',
 '1995-04-24',
 '1995-04-25',
 '1995-04-26',
 '1995-04-27',
 '1995-04-28',
 '1995-04-29',
 '1995-04-30',
 '1995-05-01',
 '1995-05-02',
 '1995-05-03',
 '1995-05-04',
 '1995-05-05',
 '1995-05-06',
 '1995-05-07',
 '1995-05-08',
 '1995-05-09',
 '1995-05-10',
 '1995-05-11',
 '1995-05-12',
 '1995-05-13',
 '1995-05-14',
 '1995-05-17',
 '1995-05-

In [37]:
weather_df = pd.DataFrame.from_dict(weather_dict).T

In [38]:
weather_df.head()

Unnamed: 0,time,sunriseTime,sunsetTime,moonPhase,temperatureHigh,temperatureHighTime,temperatureLow,temperatureLowTime,apparentTemperatureHigh,apparentTemperatureHighTime,...,windGust,windGustTime,icon,precipIntensity,precipIntensityMax,precipIntensityMaxTime,precipProbability,precipAccumulation,summary,ozone
1989-03-01,604746000.0,604775000.0,604812000.0,0.8,29.65,604814000.0,6.68,604861000.0,20.64,604814000.0,...,,,,,,,,,,
1989-03-02,604832000.0,604861000.0,604898000.0,0.83,11.69,604858000.0,-15.29,604926000.0,-0.54,604858000.0,...,,,,,,,,,,
1989-03-03,604919000.0,604947000.0,604985000.0,0.87,1.84,604970000.0,-26.23,605034000.0,-5.72,604958000.0,...,,,,,,,,,,
1989-03-04,605005000.0,605034000.0,605072000.0,0.9,4.69,605063000.0,-38.53,605119000.0,4.47,605062000.0,...,,,,,,,,,,
1989-03-05,605092000.0,605120000.0,605158000.0,0.94,0.74,605149000.0,-27.43,605189000.0,0.23,605149000.0,...,,,,,,,,,,


In [39]:
weather_df.shape

(2679, 40)

In [40]:
weather_df.to_csv('../data/raw_weather_1989-2020.csv', index_label = 'Date')