# Pull and manipulate the API data

The point of this exercise is to try data enrichment with data from external APIs. We are going to take data about road kills in Vermont in 2006 and try to figure out the weather during the accident and how many bars there are in the area. We will work with one api that has a Python wrapper library and one that does not.



## Data

The data for this exercise can be found [here](https://drive.google.com/file/d/1Nk-5RZC1yzJl7HzOJd3tzUjzMENSSdJJ/view?usp=sharing).

Just run the cells below to get your data ready. Little help from us.


In [1]:
import pandas as pd
import numpy as np
from pandas.io.json import json_normalize  
from pprint import pprint
import xml.etree.ElementTree as ET
import requests 
import os
from re import search

In [2]:
mypath = '/mnt/d/lighthouse/lighthouse_data_notes/Week_2/Day_3/Other_data_types_exercise/'

In [3]:
data = pd.read_csv(mypath + "VT_VehicleAnimal_Collisions__2006.csv", 
                   na_values=['', ' '],
                   parse_dates=["DATE_"]
                  )
data["MONTH_"] = data.DATE_.dt.month
data.dropna(subset=['X', 'Y'], inplace=True)
# creation of variable with lon and lat together
data['ll'] = data['Y'].astype(str) + ',' + data['X'].astype(str)
print(data.shape)

(2244, 23)


In [4]:
data['date_string'] = data.DATE_.astype(str)

In [5]:
temp = data['date_string'].str.split("+",expand = True)

In [6]:
desired = temp[0]
desired = desired.str.replace(' ','T')
data['date'] = desired

In [7]:
data.head(2)

Unnamed: 0,X,Y,OBJECTID,MSRI_CODE,MSRI_DESCR,DATE_,TOWN_ID,ROUTE_DES,ROUTE,BEGIN_MM,...,RT_NUM,DAY_,MONTH_,YEAR_,REP_AGEN,LOCATION,YEAR_INT,ll,date_string,date
4,-72.989424,42.833309,5,Deer,Deer,2004-04-16 00:00:00+00:00,209.0,VT 8,8,0.35,...,,,4.0,2004,AOT,,2004,"42.83330874047745,-72.98942388725152",2004-04-16 00:00:00+00:00,2004-04-16T00:00:00
5,-72.375495,43.581005,6,Deer RK AB,"Deer, RoadKill, Adult Buck",2005-03-22 00:00:00+00:00,1409.0,US 5,5,5.0,...,,,3.0,2005,AOT,,2005,"43.581005107295645,-72.37549549789222",2005-03-22 00:00:00+00:00,2005-03-22T00:00:00


## Foursquare API

Foursquare API documentation is [here](https://developer.foursquare.com/)

1. Start a foursquare application and get your keys.
2. For each crash, pull number of of bars (category "Nightlife") in 5km radius.
3. Find a relationship between number of bars in the area and severity of the crash.

Hints:

* check out python package "foursquare"
* what happens if the code fails?
* what if you run out of requests? (check out [time](https://docs.python.org/2/library/time.html) package)

In [11]:
#set the keys
#Hotmail Key
foursquare_id = "FPZRTCK1PLAB5ZSBJBTVSAB0XLYWHVIFQOYS2OPQ1BDPKYSL"
foursquare_secret = "CEJPLMC55K4IGKB5XHTIZC2WPKQUTK3FRL1ZFBIVYLM44MOO"

#Google Key
#foursquare_id = "LHDPZ2TF3YUKTU4F4S5YY5BE3C20ZZRMQN3112X5OUWQMM1F"
#foursquare_secret = "VEV4YAZNK31DPPCOYCTZR3AFZWWEQTA3T5BB5HJEO05JJIX5"

In [12]:
# Install and load the library
import foursquare
from math import sin, cos, sqrt, atan2, radians
# Construct the client object
client = foursquare.Foursquare(client_id=foursquare_id, client_secret=foursquare_secret, redirect_uri='http://fondu.com/oauth/authorize')
# Build the authorization url for your app
auth_uri = client.oauth.auth_url()

In [14]:
category = client.venues.categories()
categories = json_normalize(category, record_path = 'categories')
nightlife_id = list(categories[categories['shortName'] == "Nightlife"]['id'])
nightlife_id = nightlife_id[0]

  categories = json_normalize(category, record_path = 'categories')


KeyError: 'shortName'

In [16]:
categories

Unnamed: 0,id,name,pluralName,shortName,categories,icon.prefix,icon.suffix
0,4d4b7104d754a06370d81259,Arts & Entertainment,Arts & Entertainment,Arts & Entertainment,"[{'id': '56aa371be4b08b9a8d5734db', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/arts_en...,.png
1,4d4b7105d754a06372d81259,College & University,Colleges & Universities,College & Education,"[{'id': '4bf58dd8d48988d198941735', 'name': 'C...",https://ss3.4sqi.net/img/categories_v2/educati...,.png
2,4d4b7105d754a06373d81259,Event,Events,Event,"[{'id': '52f2ab2ebcbc57f1066b8b3b', 'name': 'C...",https://ss3.4sqi.net/img/categories_v2/event/d...,.png
3,4d4b7105d754a06374d81259,Food,Food,Food,"[{'id': '503288ae91d4c4b30a586d67', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/food/de...,.png
4,4d4b7105d754a06376d81259,Nightlife Spot,Nightlife Spots,Nightlife,"[{'id': '4bf58dd8d48988d116941735', 'name': 'B...",https://ss3.4sqi.net/img/categories_v2/nightli...,.png
5,4d4b7105d754a06377d81259,Outdoors & Recreation,Outdoors & Recreation,Outdoors & Recreation,"[{'id': '4f4528bc4b90abdf24c9de85', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/parks_o...,.png
6,4d4b7105d754a06375d81259,Professional & Other Places,Professional & Other Places,Professional,"[{'id': '4e52d2d203646f7c19daa8ae', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/buildin...,.png
7,4e67e38e036454776db1fb3a,Residence,Residences,Residence,"[{'id': '5032891291d4c4b30a586d68', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/buildin...,.png
8,4d4b7105d754a06378d81259,Shop & Service,Shops & Services,Shops,"[{'id': '52f2ab2ebcbc57f1066b8b56', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/shops/d...,.png
9,4d4b7105d754a06379d81259,Travel & Transport,Travel & Transport,Travel,"[{'id': '4bf58dd8d48988d1ed931735', 'name': 'A...",https://ss3.4sqi.net/img/categories_v2/travel/...,.png


In [None]:
def get_venues(ll):
    venues = client.venues.search(params = {'categoryId':nightlife_id, 'll' : ll, 'radius':'5000'})
    if len(venues) == 1:
        num_bars = 0
    else:
        num_bars = len(venues['venues'])
    return num_bars

#for i in range(len(ll_list)):
#    bars.append(get_venues(str(ll_list[i])))

In [357]:
# Create the function get_venues, 
# that will pull bars in the radius of 5km around the crash

res = client.venues.search(params={'query': 'Nightlife', 'll': '40.7233000,-74.0030000'})
res2 = client.venues.search(params={'query': 'bar', 'll': '40.7233000,-74.0030000'})
res3 = client.venues('5c56242b898bdc002c589118')
# EXAMPLE
# get_venues('48.146394, 17.107969')

{'venues': [{'id': '5cdc3b21c824ae002c24f8aa',
   'name': 'NYC Office Of Nightlife',
   'location': {'address': '1 Centre St',
    'lat': 40.712938,
    'lng': -74.003656,
    'labeledLatLngs': [{'label': 'display',
      'lat': 40.712938,
      'lng': -74.003656}],
    'distance': 1154,
    'postalCode': '10007',
    'cc': 'US',
    'city': 'New York',
    'state': 'NY',
    'country': 'United States',
    'formattedAddress': ['1 Centre St',
     'New York, NY 10007',
     'United States']},
   'categories': [{'id': '4bf58dd8d48988d124941735',
     'name': 'Office',
     'pluralName': 'Offices',
     'shortName': 'Office',
     'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/building/default_',
      'suffix': '.png'},
     'primary': True}],
   'referralId': 'v-1596082593',
   'hasPerk': False},
  {'id': '52ad25e8498e65c5310e5da2',
   'name': 'NYC Premier Nightlife',
   'location': {'lat': 40.723024,
    'lng': -73.984239,
    'labeledLatLngs': [{'label': 'display',
      

In [421]:
bar_dict ={}

for index, row in data.iterrows():
    
    lat_1 = row['Y']
    lon_1 = row['X']
    
    res = client.venues.search(params={'query': 'bar', 'll': row['ll']})
    
    for x in res['venues']:
        lat_2 = x['location']['lat']
        lon_2 = x['location']['lng']
        if len(x['categories']) < 1:
            continue
        else:
            if (x['categories'][0]['icon']['prefix'].find('nightlife')>0 and distance(lat_1,lon_1,lat_2,lon_2) < 5):
                if row['OBJECTID'] not in bar_dict:
                    bar_dict[row['OBJECTID']] = 1
                else:
                    bar_dict[row['OBJECTID']] += 1

Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5f225b900070593f9d206009'}
Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5f225bd571a03639f39f6e55'}
Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5f225cbbf598007f30b894a6'}


FoursquareException: Unknown error. meta: {'code': 429, 'errorType': 'quota_exceeded', 'errorDetail': 'Quota exceeded', 'requestId': '5f225cbbf598007f30b894a6'}

In [423]:
bar_dict

{}

In [420]:
print(len(res['venues'][2]['categories']))
print(len([]))
x

1
0


{'id': '5d19212f6d22da002f01f41a',
 'name': 'Caldonia Spirits Distillery & Bar',
 'location': {'address': '116 Gin Ln',
  'lat': 44.2503,
  'lng': -72.566933,
  'labeledLatLngs': [{'label': 'display', 'lat': 44.2503, 'lng': -72.566933}],
  'distance': 4267,
  'postalCode': '05602',
  'cc': 'US',
  'city': 'Montpelier',
  'state': 'VT',
  'country': 'United States',
  'formattedAddress': ['116 Gin Ln', 'Montpelier, VT 05602', 'United States']},
 'categories': [{'id': '4e0e22f5a56208c4ea9a85a0',
   'name': 'Distillery',
   'pluralName': 'Distilleries',
   'shortName': 'Distillery',
   'icon': {'prefix': 'https://ss3.4sqi.net/img/categories_v2/food/brewery_',
    'suffix': '.png'},
   'primary': True}],
 'referralId': 'v-1596086694',
 'hasPerk': False}

In [355]:
res['venues'][4]['categories'][0]['icon']['prefix']

'https://ss3.4sqi.net/img/categories_v2/nightlife/pub_'

In [363]:
res['venues'][4]['categories'][0]['icon']['prefix'].find(' ')

-1

In [364]:
# Formula o Calculate the distance given lat and long
def distance(lat_1, lon_1, lat_2, lon_2):
    R = 6373.0
    lat1 = radians(lat_1)
    lon1 = radians(lon_1)
    lat2 = radians(lat_2)
    lon2 = radians(lon_2)

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    
    return(distance)

## Virtual Crossing API

Virtual Crossing API documentation is [here](https://www.visualcrossing.com/resources/documentation/)

1. Sign up for FREE api key if you haven't done that before.
2. For each crush, get the weather for the location and time.
3. Find a relationship between the weather and severity of the crash.

Hints:

* randomly sample only 500 or so (due to API limits), or pull weather only for each town, not every crash
* for sending HTTP requests check out "requests" library [here](http://docs.python-requests.org/en/master/)



In [264]:
import requests
import time
api_key = "QJCNV1F8U9FVG6KYA82ITA3L7"
#url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history?&aggregateHours=24&startDateTime="+time+"&endDateTime="+time+"&unitGroup=metric&contentType=json&dayStartTime=0:0:00&dayEndTime=0:0:00&location="+location+"&key="+api_key

In [265]:
weather_dict ={}

for index, row in data.iterrows():
    location = row['ll']
    time = row['date']
    url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history?&aggregateHours=24&startDateTime="+time+"&endDateTime="+time+"&unitGroup=metric&contentType=json&dayStartTime=0:0:00&dayEndTime=0:0:00&location="+location+"&key="+api_key
    
    res=requests.get(url)
    result = res.json()
    
    weather_dict[location] = result['locations'][location]['values'][0]

KeyError: 'locations'

In [298]:
weather_df = pd.DataFrame.from_dict(weather_dict)

In [299]:
weather_df = weather_df.T
#weather_df.head()

In [300]:
weather_dropped = weather_df.drop(columns=['wdir','visibility','datetime','wspd','precipcover','heatindex', 'weathertype','dew','snowdepth','sealevelpressure','wgust','info'])
#weather_dropped

In [301]:
weather_dropped = weather_dropped.reset_index()
weather_dropped = weather_dropped.rename(columns={'index':'ll'})

weather_temp = weather_dropped['datetimeStr'].str.split("T",expand = True)
#weather_temp  = weather_temp.iloc[:, :-1]
temp = weather_temp[1].str.split('-',expand = True)
weather_temp[1] = temp[0]
weather_temp['date_string'] = weather_temp[0] + "T" + weather_temp[1]
#weather_temp

In [302]:
weather_dropped['datetimeStr'] = weather_temp['date_string']

In [303]:
weather_dropped.head()

Unnamed: 0,ll,temp,maxt,datetimeStr,cloudcover,mint,precip,humidity,conditions,windchill
0,"42.83330874047745,-72.98942388725152",6.0,14.0,2004-04-16T00:00:00,0.0,-1.0,0.0,38.63,Clear,-4.1
1,"43.581005107295645,-72.37549549789222",23.4,30.6,2005-07-20T00:00:00,19.7,16.2,0.0,69.78,Clear,
2,"43.23502288333559,-72.8402568531839",1.2,12.1,2004-11-15T00:00:00,1.2,-5.3,0.0,71.68,Clear,-3.9
3,"44.78300220380285,-72.83099079061695",1.6,3.5,2004-10-18T00:00:00,29.0,-3.6,0.8,77.48,"Rain, Partially cloudy",-2.9
4,"44.24232301744294,-72.51459452575021",7.1,16.1,2005-04-22T00:00:00,7.9,-4.0,0.0,46.22,Clear,6.2


In [304]:
data.head()

Unnamed: 0,X,Y,OBJECTID,MSRI_CODE,MSRI_DESCR,DATE_,TOWN_ID,ROUTE_DES,ROUTE,BEGIN_MM,...,RT_NUM,DAY_,MONTH_,YEAR_,REP_AGEN,LOCATION,YEAR_INT,ll,date_string,date
4,-72.989424,42.833309,5,Deer,Deer,2004-04-16 00:00:00+00:00,209.0,VT 8,8,0.35,...,,,4.0,2004,AOT,,2004,"42.83330874047745,-72.98942388725152",2004-04-16 00:00:00+00:00,2004-04-16T00:00:00
5,-72.375495,43.581005,6,Deer RK AB,"Deer, RoadKill, Adult Buck",2005-03-22 00:00:00+00:00,1409.0,US 5,5,5.0,...,,,3.0,2005,AOT,,2005,"43.581005107295645,-72.37549549789222",2005-03-22 00:00:00+00:00,2005-03-22T00:00:00
6,-72.840257,43.235023,7,Lg Bird,"Large Bird (hawk, owl, turkey, waterfowl)",2004-11-15 00:00:00+00:00,1310.0,VT 11,11,0.5,...,,,11.0,2004,AOT,,2004,"43.23502288333559,-72.8402568531839",2004-11-15 00:00:00+00:00,2004-11-15T00:00:00
7,-72.830991,44.783002,8,Otter,Otter,2004-10-18 00:00:00+00:00,601.0,VT 36,36,1.0,...,,,10.0,2004,AOT,,2004,"44.78300220380285,-72.83099079061695",2004-10-18 00:00:00+00:00,2004-10-18T00:00:00
9,-72.514595,44.242323,10,Beaver,Beaver,2005-04-22 00:00:00+00:00,1207.0,US 2,2,0.19,...,,,4.0,2005,AOT,,2005,"44.24232301744294,-72.51459452575021",2005-04-22 00:00:00+00:00,2005-04-22T00:00:00


In [305]:
merged = data.merge(weather_dropped,left_on=['ll','date'],right_on=['ll','datetimeStr'])

In [307]:
merged

Unnamed: 0,X,Y,OBJECTID,MSRI_CODE,MSRI_DESCR,DATE_,TOWN_ID,ROUTE_DES,ROUTE,BEGIN_MM,...,date,temp,maxt,datetimeStr,cloudcover,mint,precip,humidity,conditions,windchill
0,-72.989424,42.833309,5,Deer,Deer,2004-04-16 00:00:00+00:00,209.0,VT 8,008,0.35,...,2004-04-16T00:00:00,6,14,2004-04-16T00:00:00,0,-1,0,38.63,Clear,-4.1
1,-72.840257,43.235023,7,Lg Bird,"Large Bird (hawk, owl, turkey, waterfowl)",2004-11-15 00:00:00+00:00,1310.0,VT 11,011,0.50,...,2004-11-15T00:00:00,1.2,12.1,2004-11-15T00:00:00,1.2,-5.3,0,71.68,Clear,-3.9
2,-72.830991,44.783002,8,Otter,Otter,2004-10-18 00:00:00+00:00,601.0,VT 36,036,1.00,...,2004-10-18T00:00:00,1.6,3.5,2004-10-18T00:00:00,29,-3.6,0.8,77.48,"Rain, Partially cloudy",-2.9
3,-72.514595,44.242323,10,Beaver,Beaver,2005-04-22 00:00:00+00:00,1207.0,US 2,002,0.19,...,2005-04-22T00:00:00,7.1,16.1,2005-04-22T00:00:00,7.9,-4,0,46.22,Clear,6.2
4,-73.195801,43.433986,12,Lg Bird,"Large Bird (hawk, owl, turkey, waterfowl)",2005-04-06 00:00:00+00:00,1126.0,VT 30,030,2.80,...,2005-04-06T00:00:00,11.1,18.9,2005-04-06T00:00:00,49.8,3.9,0,40.21,Partially cloudy,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,-72.718631,44.390957,447,Otter,Otter,2004-09-10 00:00:00+00:00,1218.0,VT 100,100,4.30,...,2004-09-10T00:00:00,15.1,20.4,2004-09-10T00:00:00,82.5,11.3,0.06,85.75,"Rain, Overcast",
175,-73.103761,43.089094,451,Bear,Bear,2005-10-24 00:00:00+00:00,215.0,US 7,007,4.80,...,2005-10-24T00:00:00,6.4,7.8,2005-10-24T00:00:00,100,4.2,1,80.91,"Rain, Overcast",4.4
176,-73.196794,44.969938,453,Other,Other,2004-09-24 00:00:00+00:00,615.0,VT 78,078,1.20,...,2004-09-24T00:00:00,10,17.9,2004-09-24T00:00:00,0,3.3,0,73.2,Clear,8
177,-73.196794,44.969938,1008,Otter,Otter,2004-09-24 00:00:00+00:00,615.0,VT 78,078,1.20,...,2004-09-24T00:00:00,10,17.9,2004-09-24T00:00:00,0,3.3,0,73.2,Clear,8


In [280]:
url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/weatherdata/history?&aggregateHours=24&startDateTime="+time+"&endDateTime="+time+"&unitGroup=metric&contentType=json&dayStartTime=0:0:00&dayEndTime=0:0:00&location="+location+"&key="+api_key

In [281]:
res=requests.get(url)
res

In [283]:
result = res.json()

In [284]:
result

{'errorCode': 999,
 'sessionId': '',
 'executionTime': -1,
 'message': 'You have exceeded the maximum number of daily requests for your account. See https://www.visualcrossing.com/weather-data-editions for information about our plans and how to get more weather data for free.'}

In [273]:
result['locations'][location]['values']

KeyError: 'locations'

In [310]:
result['locations'][location]['values'][0]

KeyError: 'locations'

In [139]:
d = {}
d[location] = result['locations'][location]['values'][0]
d['asdasd'] = result['locations'][location]['values'][0]