# Data Acquisition

Phase 1:
    
    - Get Top 10 sites to visit in Chicago
    - Get a list of restaurants for each of the top 10 sites
    - Get the Chicago Police Department Crime Data

In [43]:
# Import Pandas to provide DataFrame support
import pandas as pd
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# Import Requests
import requests

# Import BeautifulSoup
from bs4 import BeautifulSoup

In [48]:
# Yaml is used to store some of the required configurations
import yaml

with open(".\\data\\config.txt", "r") as f:
    cfg = yaml.load(f, Loader=yaml.FullLoader)
 
search_params = {
    'client_id': cfg['client_id'],
    'client_secret': cfg['client_secret'],
    'intent': 'browse',
    'limit': 50,
    'v': cfg['version']
}

In [56]:
# Use the Requests get method to request the top sites in Chicago
page = requests.get(
    "https://foursquare.com/explore?mode=url&near=Chicago%2C%20IL%2C%20United%20States&nearGeoId=72057594042815334&q=Top%20Picks")

# Convert the HTML response into a BeautifulSoup Object
soup = BeautifulSoup(page.content, 'html.parser')

# Use the BeautifulSoup find_all method to extract each top site venue details.
top_venues = soup.find_all('div', class_='venueDetails')


In [81]:
top_venues[0].find(class_="venueName").h2.a['href'].split('/')[-1]

'4c23e840a852c928594ae26c'

In [78]:
# The column names for the top venues dataframe
venue_columns = ['id', 
                 'score', 
                 'category', 
                 'name', 
                 'address',
                 'postalcode',
                 'city',
                 'href', 
                 'latitude', 
                 'longitude']

# Create the empty top venues dataframe
df_top_venues = pd.DataFrame(columns=venue_columns)

# For each venue in the BeautifulSoup HTML object
for venue in top_venues:
    
    # Extract the available attributes
    venue_name = venue.find(target="_blank").get_text()
    venue_score = venue.find(class_="venueScore positive").get_text()
    venue_cat = venue.find(class_="categoryName").get_text()
    venue_href = venue.find(class_="venueName").h2.a['href']
    venue_id = venue_href.split('/')[-1]   

    if 'promotedTipId' in venue_id: 
        continue
        
    # Contruct the FourSquare venue API URL
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        venue_id, 
        cfg['client_id'],
        cfg['client_secret'],
        cfg['version'])
    
    # Request the venue data
    result = requests.get(url).json()
    print(url)

https://api.foursquare.com/v2/venues/4c23e840a852c928594ae26c?client_id=I3GASL15SVBG55NYCBB1MUJTV2TY1SUNP13OENTMFQX3PRFU&client_secret=ZD0P1HKFZNWREIOXHZE3VCFRZLTAC3XARJFRCCXBRRAY0OYS&v=20180604
https://api.foursquare.com/v2/venues/42b75880f964a52090251fe3?client_id=I3GASL15SVBG55NYCBB1MUJTV2TY1SUNP13OENTMFQX3PRFU&client_secret=ZD0P1HKFZNWREIOXHZE3VCFRZLTAC3XARJFRCCXBRRAY0OYS&v=20180604
https://api.foursquare.com/v2/venues/4b9d15c5f964a520478e36e3?client_id=I3GASL15SVBG55NYCBB1MUJTV2TY1SUNP13OENTMFQX3PRFU&client_secret=ZD0P1HKFZNWREIOXHZE3VCFRZLTAC3XARJFRCCXBRRAY0OYS&v=20180604
https://api.foursquare.com/v2/venues/4b06c066f964a52097ef22e3?client_id=I3GASL15SVBG55NYCBB1MUJTV2TY1SUNP13OENTMFQX3PRFU&client_secret=ZD0P1HKFZNWREIOXHZE3VCFRZLTAC3XARJFRCCXBRRAY0OYS&v=20180604
https://api.foursquare.com/v2/venues/4f2a0d0ae4b0837d0c4c2bc3?client_id=I3GASL15SVBG55NYCBB1MUJTV2TY1SUNP13OENTMFQX3PRFU&client_secret=ZD0P1HKFZNWREIOXHZE3VCFRZLTAC3XARJFRCCXBRRAY0OYS&v=20180604
https://api.foursquare.co

In [74]:
top_venues[0].find(target="_blank")

<a href="/v/nature-boardwalk/4c23e840a852c928594ae26c" target="_blank">Nature Boardwalk</a>

In [75]:
top_venues[0].find(target="_blank").get_text()

'Nature Boardwalk'

In [59]:

    # Get the properly formatted address and the latitude and longitude
    venue_address = result['response']['venue']['location']['address']
    venue_postalcode = result['response']['venue']['location']['postalCode']
    venue_city = result['response']['venue']['location']['city']
    venue_latitude = result['response']['venue']['location']['lat']
    venue_longitude = result['response']['venue']['location']['lng']
    
    # Add the venue to the top venues dataframe
    df_top_venues = df_top_venues.append({'id': venue_id,
                                          'score': venue_score,
                                          'category': venue_cat,
                                          'name': venue_name,
                                          'address': venue_address,
                                          'postalcode': venue_postalcode,
                                          'city': venue_city,
                                          'href': venue_href,
                                          'latitude': venue_latitude,
                                          'longitude': venue_longitude}, ignore_index=True)

KeyError: 'venue'

In [55]:
result

{'meta': {'code': 429,
  'errorDetail': 'Quota exceeded',
  'errorType': 'quota_exceeded',
  'requestId': '5f00d0d0d2caaa488eee8b68'},
 'response': {}}

In [50]:
# The column names for the top venues dataframe
venue_columns = ['id', 
                 'score', 
                 'category', 
                 'name', 
                 'address',
                 'postalcode',
                 'city',
                 'href', 
                 'latitude', 
                 'longitude']

# Create the empty top venues dataframe
df_top_venues = pd.DataFrame(columns=venue_columns)

# For each venue in the BeautifulSoup HTML object
for venue in top_venues:
    
    # Extract the available attributes
    venue_name = venue.find(target="_blank").get_text()
    venue_score = venue.find(class_="venueScore positive").get_text()
    venue_cat = venue.find(class_="categoryName").get_text()
    venue_href = venue.find(class_="venueName").h2.a['href']
    venue_id = venue_href.split('/')[-1]

    if 'promotedTipId' in venue_id: 
        continue
        
    # Contruct the FourSquare venue API URL
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        venue_id, 
        cfg['client_id'],
        cfg['client_secret'],
        cfg['version'])
    
    # Request the venue data
    result = requests.get(url).json()
        
    # Get the properly formatted address and the latitude and longitude
    venue_address = result['response']['venue']['location']['address']
    venue_postalcode = result['response']['venue']['location']['postalCode']
    venue_city = result['response']['venue']['location']['city']
    venue_latitude = result['response']['venue']['location']['lat']
    venue_longitude = result['response']['venue']['location']['lng']
    
    # Add the venue to the top venues dataframe
    df_top_venues = df_top_venues.append({'id': venue_id,
                                          'score': venue_score,
                                          'category': venue_cat,
                                          'name': venue_name,
                                          'address': venue_address,
                                          'postalcode': venue_postalcode,
                                          'city': venue_city,
                                          'href': venue_href,
                                          'latitude': venue_latitude,
                                          'longitude': venue_longitude}, ignore_index=True)

KeyError: 'venue'

In [41]:
type(top_venues)

bs4.element.ResultSet

In [42]:
# For each venue in the BeautifulSoup HTML object
for venue in top_venues:
    
    # Extract the available attributes
    venue_name = venue.find(target="_blank").get_text()
    venue_score = venue.find(class_="venueScore positive").get_text()
    venue_cat = venue.find(class_="categoryName").get_text()
    venue_href = venue.find(class_="venueName").h2.a['href']
    venue_id = venue_href.split('/')[-1]

    if 'promotedTipId' in venue_id: 
        continue
        
    # Contruct the FourSquare venue API URL
    url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&v={}'.format(
        venue_id, 
        cfg['client_id'],
        cfg['client_secret'],
        cfg['version'])
    
    # Request the venue data
    result = requests.get(url).json()
        
    # Get the properly formatted address and the latitude and longitude
    venue_address = result['response']['venue']['location']['address']
    venue_postalcode = result['response']['venue']['location']['postalCode']
    venue_city = result['response']['venue']['location']['city']
    venue_latitude = result['response']['venue']['location']['lat']
    venue_longitude = result['response']['venue']['location']['lng']
    
    # Add the venue to the top venues dataframe
    df_top_venues = df_top_venues.append({'id': venue_id,
                                          'score': venue_score,
                                          'category': venue_cat,
                                          'name': venue_name,
                                          'address': venue_address,
                                          'postalcode': venue_postalcode,
                                          'city': venue_city,
                                          'href': venue_href,
                                          'latitude': venue_latitude,
                                          'longitude': venue_longitude}, ignore_index=True)

KeyError: 'venue'