# Testing requests

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests 
import time
import pprint
import openweathermapy.core as owm

# Import datetime for today's (execution) date
import datetime
from datetime import datetime as dt

# Import API key
from api_keys import api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

In [2]:
cities= ["Chicago","Atlanta","Boston","Los Angeles","Denver"]
settings = {"units": "Imperial", "appid": api_key}

weather_df = pd.DataFrame()
weather_df["City"] = cities
weather_df["Lat"] = ""
weather_df["Lng"] = ""
weather_df["Date"] = ""
weather_df["Max Temp"] = ""
weather_df["Min Temp"] = ""
weather_df["Humidity"] = ""
weather_df["Clouds"] = ""
weather_df["Wind Speed"] = ""
weather_df["Weather ID"] = ""

weather_df.head()

Unnamed: 0,City,Lat,Lng,Date,Max Temp,Min Temp,Humidity,Clouds,Wind Speed,Weather ID
0,Chicago,,,,,,,,,
1,Atlanta,,,,,,,,,
2,Boston,,,,,,,,,
3,Los Angeles,,,,,,,,,
4,Denver,,,,,,,,,


In [3]:
missedtot = 0
for i in range(len(cities)): 
    try:
        ctmp = weather_df["City"][i]
        tmp = owm.get_current(city = weather_df["City"][i], **settings)
        weather_df.loc[i, 'Country'] = tmp['sys']['country']        
        weather_df.loc[i, 'Lat'] = tmp['coord']['lat']
        weather_df.loc[i, 'Lng'] = tmp['coord']['lon']
        weather_df.loc[i, 'Date'] = tmp['dt']
        weather_df.loc[i, 'Max Temp'] = tmp['main']['temp_max']
        weather_df.loc[i, 'Min Temp'] = tmp['main']['temp_min']
        weather_df.loc[i, 'Humidity'] = tmp['main']['humidity']
        weather_df.loc[i, 'Clouds'] = tmp['clouds']['all']
        weather_df.loc[i, 'Wind Speed'] = tmp['wind']['speed']
        weather_df.loc[i, 'Weather ID'] = tmp['weather'][0]['id']
        time.sleep(1.0001) # Pull limit is 60/min
        print(f"{i} out of {len(cities)} | Found data for: {ctmp.title()}")
    except: 
        missedtot +=1
        print(f"No data for index ({i})... skipping.")       
#print(f"For {len(cities)}, dropped {missedtot} ({round(missedtot/len(cities)*100,1)}% lost)")
#print("Cleaning and saving dataframe...")

weather_df.head()

0 out of 5 | Found data for: Chicago
1 out of 5 | Found data for: Atlanta
2 out of 5 | Found data for: Boston
3 out of 5 | Found data for: Los Angeles
4 out of 5 | Found data for: Denver


Unnamed: 0,City,Lat,Lng,Date,Max Temp,Min Temp,Humidity,Clouds,Wind Speed,Weather ID,Country
0,Chicago,41.88,-87.62,1567725597,73.4,64.0,64,1,4.7,800,US
1,Atlanta,33.75,-84.39,1567725519,93.2,84.0,33,1,8.05,800,US
2,Boston,42.36,-71.06,1567725668,64.4,59.0,63,1,3.36,800,US
3,Los Angeles,34.05,-118.24,1567725513,98.6,78.01,39,1,9.17,800,US
4,Denver,39.74,-104.98,1567725525,91.4,79.0,28,75,9.17,501,US


# Using DarkSkyAPI

Powered by Dark Sky: https://darksky.net/

In [4]:
from api_keys import dsa_key

In [4]:
pprint.pprint(tmp)

{'base': 'stations',
 'clouds': {'all': 75},
 'cod': 200,
 'coord': {'lat': 39.74, 'lon': -104.98},
 'dt': 1567725525,
 'id': 5419384,
 'main': {'humidity': 28,
          'pressure': 1025,
          'temp': 84.97,
          'temp_max': 91.4,
          'temp_min': 79},
 'name': 'Denver',
 'sys': {'country': 'US',
         'id': 3958,
         'message': 0.0107,
         'sunrise': 1567686702,
         'sunset': 1567733162,
         'type': 1},
 'timezone': -21600,
 'visibility': 16093,
 'weather': [{'description': 'moderate rain',
              'icon': '10d',
              'id': 501,
              'main': 'Rain'}],
 'wind': {'deg': 260, 'gust': 21.92, 'speed': 9.17}}


In [5]:
# Calculating the number of pulls
x=0
x=x+365 # days
x=x*5 # years
x=x*5 # cities
print(f"Requires {x} pulls")
print(f"Cost: ${round(x*0.0001,2)}")

Requires 9125 pulls
Cost: $0.91


In [8]:
# Test Pull for Weather Type
tmp['hourly']['data'][0]['summary']

KeyError: 'hourly'

# Looping Across All Dates

In [9]:
# Defining Start and End Dates
start = dt.isoformat(dt.strptime("2014-01-01T00:00:00-05:00", '%Y-%m-%dT%H:%M:%S%z')) # Change these for each Time Zone
end = dt.isoformat(dt.strptime("2019-01-01T00:00:00-05:00", '%Y-%m-%dT%H:%M:%S%z'))

In [10]:
# Lat/Lng for Selected Cities
cities = ["Chicago","Atlanta","Boston","Los Angeles","Denver"]
latlng = ["41.88,-87.62","33.75,-84.39","42.36,-71.06","34.05,-118.24","39.74,-104.98"]
tz = ["-05:00","-04:00","-04:00","-07:00","-06:00"]


In [11]:
j = 0 # Hour Tracker
loss = 0 # Tracking lost data
#atl_df = pd.DataFrame(columns = ["Time","Weather"]) 
#lac_df = pd.DataFrame(columns = ["Time","Weather"])
#bos_df = pd.DataFrame(columns = ["Time","Weather"])
#den_df = pd.DataFrame(columns = ["Time","Weather"])
chi_df = pd.DataFrame(columns = ["Time","Weather"])


# Looping Across all Latitudes and Longitudes
ctyi= 0 # 0 is Chicago, 1 is Atlanta, 2 is Boston, 3 is LA, 4 is Denver
print(f"Collecting Data for {cities[ctyi]}.")

# Starting at Day 1 and Looping from there
date = start
while date != end:
    #print(f"Working on {date}")
    # Using api key, latlng, and date to make an API pull across every day
    query_url = (f"https://api.darksky.net/forecast/{dsa_key}/{latlng[ctyi]},{date}?exclude=currently,daily,minutely,alerts,flags")
    response = requests.get(query_url)
    tmp = response.json()
    for i in range(24):
        try:
            tmp_srs = [tmp['hourly']['data'][i]['time'],tmp['hourly']['data'][i]['summary']]
        except:
            tmp_srs = [f"Miss hr{i} @ {j}",np.nan]
            loss += 1
            #print(f"No Data found for hour {i}")
        chi_df.loc[j] = (tmp_srs)
        j+=1
    date = dt.isoformat(dt.strptime(date, '%Y-%m-%dT%H:%M:%S%z') + datetime.timedelta(days=1))
        
print(f"Filled {j} hours per day! If this number is 43824, then everything is accounted for, congrats!")
print(f"Lost {loss} out of {j}({round(loss/j,5)}% loss)")

Collecting Data for Chicago.
Filled 43824 hours per day! If this number is 43824, then everything is accounted for, congrats!
Lost 11 out of 43824(0.00025% loss)


In [12]:
#atl_df.to_csv("atl_api.csv")
#lac_df.to_csv("lac_api.csv")
#bos_df.to_csv("bos_api.csv")
#den_df.to_csv("den_api.csv")
#chi_df.to_csv("chi_api.csv")