## Yelp API Calls

The API calls are being used to gather restaurant and attraction information for the 5 boroughs of New York City (Manhatten, Staten Island, the Bronx, Queens, and Brooklyn) and Paris. 

Imports

In [1]:
import pandas as pd
import requests
import json
import time
from config import api_key
from config import client_id

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

Yelp API key and parameter informations

In [2]:
client_id = {'your client ID here'}

api_key ={'Your API Key here'}

endpoint = 'https://api.yelp.com/v3/businesses/search'

headers = {'Authorization':'bearer %s' % api_key}

restaurant_params = {
    'term' : 'restaurant',
    'limit' : 50,
    'radius' : 5000,
    'location' : None
}
attraction_params = {
    'term' : 'Arts & Entertainment',
    # 'attraction',
    'limit' : 50,
    # 'radius' : 5000,
    'location' : None
}

Zip Codes

The Paris zip codes are a string to ensure data from Paris comes back and not zip codes from the US

In [3]:
manhattan = [10031, 10032, 10033, 10034, 10040, 10026, 10027, 10030, 10037, 10039, 10029, 10035, 10023, 10024, 10025, 10021, 10028, 10044, 10065, 10075, 10128, 10001, 10011, 10018, 10019, 10020, 10036, 10010, 10016, 10017, 10022, 10012, 10013, 10014, 10002, 10003, 10009, 10004, 10005, 10006, 10007, 10038, 10280] 
staten_island = [10302, 10303, 10310, 10306, 10307, 10308, 10309, 10312, 10301, 10304, 10305, 10314] 
bronx =[10463, 10471, 10466, 10469, 10470, 10475, 10453, 10457, 10460, 10458, 10467, 10468, 10461, 10462, 10464, 10465, 10472, 10473, 10451, 10452, 10456, 10454, 10455, 10459, 10474] 
queens =[11101, 11102, 11103, 11104, 11105, 11106, 11368, 11369, 11370, 11372, 11373, 11377, 11378, 11354, 11355, 11356, 11357, 11358, 11359, 11360, 11361, 11362, 11363, 11364, 11374, 11375, 11379, 11385, 11365, 11366, 11367, 11414, 11415, 11416, 11417, 11418, 11419, 11420, 11421, 11412, 11423, 11432, 11433, 11434, 11435, 11436, 11004, 11005, 11411, 11413, 11422, 11426, 11427, 11428, 11429, 11691, 11692, 11693, 11694, 11695, 11697] 
brooklyn =[11211, 11222, 11201, 11205, 11215, 11217, 11231, 11212, 11213, 11216, 11233, 11238, 11207, 11208, 11220, 11232, 11209, 11214, 11228, 11204, 11218, 11219, 11230, 11234, 11236, 11239, 11223, 11224, 11229, 11235, 11203, 11210, 11225, 11226, 11206, 11221, 11237] 
paris=['Paris,75001', 'Paris,75002', 'Paris,75003', 'Paris,75004', 'Paris,75005', 'Paris,75006', 'Paris,75007', 'Paris,75008', 'Paris,75009', 'Paris,75010', 'Paris,75011', 'Paris,75012', 'Paris,75013', 'Paris,75014', 'Paris,75015', 'Paris,75016', 'Paris,75116', 'Paris,75017', 'Paris,75018', 'Paris,75019', 'Paris,75020']

## API Calls

The following are the API calls for the Staten Island zip codes in the order:

- Restaurant API call
- Attraction API call

After each call the list that is returned is transformed into a JSON as well as a DataFrame to ensure the correct information has come back

*There will be duplicated restaurants that come back in each call. These will be addressed when we read in the data to perform our EDA

Staten Island Restaurant Call

In [4]:
all_si_rest_data = []
for zip in staten_island:
    try:
        restaurant_params['location'] = zip
        response_si= requests.get(url = endpoint,
                                  params= restaurant_params,
                                  headers = headers)
        if response_si.status_code == 200:
            si_data = response_si.json()
            all_si_rest_data.append(si_data)

        else:
            print(f'API request failed with {response_si.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [5]:
all_si_rest_data = json.dumps(all_si_rest_data)

In [6]:
si_rest_df = pd.json_normalize(json.loads(all_si_rest_data), 'businesses', meta=['total', 'region'])

In [7]:
# si_rest_df.head(5)

In [8]:
# Used for checking Duplicates
# si_rest_df.duplicated(subset=['name']).value_counts()

Attraction Call

In [9]:
all_si_attract_data = []
for zip in staten_island:
    try:
        attraction_params['location'] = zip
        response_si_attract= requests.get(url = endpoint,
                                  params= attraction_params,
                                  headers = headers)
        if response_si_attract.status_code == 200:
            si_data_attract = response_si_attract.json()
            all_si_attract_data.append(si_data_attract)

        else:
            print(f'API request failed with {response_si_attract.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [10]:
all_si_attract_data = json.dumps(all_si_attract_data)

si_attract_df = pd.json_normalize(json.loads(all_si_attract_data), 'businesses', meta=['total', 'region'])

In [11]:
# si_attract_df.tail(5)

# Used for checking Duplicates
si_attract_df.duplicated(subset=['name']).value_counts()

True     389
False    211
dtype: int64

Manhatten Restaurant Call

In [13]:
all_man_rest_data = []
for zip in manhattan:
    try:
        restaurant_params['location'] = zip
        response_man= requests.get(url = endpoint,
                                  params= restaurant_params,
                                  headers = headers)
        if response_man.status_code == 200:
            man_data = response_man.json()
            all_man_rest_data.append(man_data)

        else:
            print(f'API request failed with {response_man.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [14]:
# Sanity Check
# all_man_rest_data

In [15]:
all_man_rest_data = json.dumps(all_man_rest_data)

man_rest_df = pd.json_normalize(json.loads(all_man_rest_data), 'businesses', meta=['total', 'region'])

In [16]:
# man_rest_df.head(5)

# Used for checking Duplicates
man_rest_df.duplicated(subset=['name']).value_counts()

True     1492
False     658
dtype: int64

Manhatten Attraction Call

In [17]:
all_man_attract_data = []
for zip in manhattan:
    try:
        attraction_params['location'] = zip
        response_man_attract= requests.get(url = endpoint,
                                  params= attraction_params,
                                  headers = headers)
        if response_man_attract.status_code == 200:
            man_data_attract = response_man_attract.json()
            all_man_attract_data.append(man_data_attract)

        else:
            print(f'API request failed with {response_man_attract.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [18]:
all_man_attract_data = json.dumps(all_man_attract_data)

man_attract_df = pd.json_normalize(json.loads(all_man_attract_data), 'businesses', meta=['total', 'region'])

In [19]:
# man_attract_df.head(5)

# Used for checking Duplicates
# man_attract_df.duplicated(subset=['name']).value_counts()

Bronx Restaurant Call

In [20]:
all_bronx_rest_data = []
for zip in bronx:
    try:
        restaurant_params['location'] = zip
        response_bronx= requests.get(url = endpoint,
                                  params= restaurant_params,
                                  headers = headers)
        if response_bronx.status_code == 200:
            bronx_data = response_bronx.json()
            all_bronx_rest_data.append(bronx_data)

        else:
            print(f'API request failed with {response_bronx.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [21]:
all_bronx_rest_data = json.dumps(all_bronx_rest_data)

bronx_rest_df = pd.json_normalize(json.loads(all_bronx_rest_data), 'businesses', meta=['total', 'region'])

In [22]:
# bronx_rest_df.head(5)

# Used for checking Duplicates
# bronx_rest_df.duplicated(subset=['name']).value_counts()

Bronx Attraction Call

In [23]:
all_bronx_attract_data = []
for zip in bronx:
    try:
        attraction_params['location'] = zip
        response_bronx_attract= requests.get(url = endpoint,
                                  params= attraction_params,
                                  headers = headers)
        if response_bronx_attract.status_code == 200:
            bronx_data_attract = response_bronx_attract.json()
            all_bronx_attract_data.append(bronx_data_attract)

        else:
            print(f'API request failed with {response_bronx_attract.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [24]:
all_bronx_attract_data = json.dumps(all_bronx_attract_data)

bronx_attract_df = pd.json_normalize(json.loads(all_bronx_attract_data), 'businesses', meta=['total', 'region'])

In [25]:
# bronx_attract_df.head(5)

# Used for checking Duplicates
# bronx_attract_df.duplicated(subset=['name']).value_counts()

Queens Restaurant Call


In [26]:
all_queens_rest_data = []
for zip in queens:
    try:
        restaurant_params['location'] = zip
        response_queens= requests.get(url = endpoint,
                                  params= restaurant_params,
                                  headers = headers)
        if response_queens.status_code == 200:
            queens_data = response_queens.json()
            all_queens_rest_data.append(queens_data)

        else:
            print(f'API request failed with {response_queens.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [27]:
all_queens_rest_data = json.dumps(all_queens_rest_data)

queens_rest_df = pd.json_normalize(json.loads(all_queens_rest_data), 'businesses', meta=['total', 'region'])

In [28]:
# queens_rest_df.head(5)

# Used for checking Duplicates
# queens_rest_df.duplicated(subset=['name']).value_counts()

Queens Attraction Call

In [29]:
all_queens_attract_data = []
for zip in queens:
    try:
        attraction_params['location'] = zip
        response_queens_attract= requests.get(url = endpoint,
                                  params= attraction_params,
                                  headers = headers)
        if response_queens_attract.status_code == 200:
            queens_data_attract = response_queens_attract.json()
            all_queens_attract_data.append(queens_data_attract)

        else:
            print(f'API request failed with {response_queens_attract.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [30]:
all_queens_attract_data = json.dumps(all_queens_attract_data)

queens_attract_df = pd.json_normalize(json.loads(all_queens_attract_data), 'businesses', meta=['total', 'region'])

In [31]:
# queens_attract_df.tail(10)

# Used for checking Duplicates
# queens_attract_df.duplicated(subset=['name']).value_counts()

Brooklyn Restaurant Call

In [32]:
all_brook_rest_data = []
for zip in brooklyn:
    try:
        restaurant_params['location'] = zip
        response_brook= requests.get(url = endpoint,
                                  params= restaurant_params,
                                  headers = headers)
        if response_brook.status_code == 200:
            brook_data = response_brook.json()
            all_brook_rest_data.append(brook_data)

        else:
            print(f'API request failed with {response_brook.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [33]:
all_brook_rest_data = json.dumps(all_brook_rest_data)

brook_rest_df = pd.json_normalize(json.loads(all_brook_rest_data), 'businesses', meta=['total', 'region'])

In [34]:
# brook_rest_df.head(5)

# Used for checking Duplicates
# brook_rest_df.duplicated(subset=['name']).value_counts()

Brooklyn Attraction Call

In [35]:
all_brook_attract_data = []
for zip in brooklyn:
    try:
        attraction_params['location'] = zip
        response_brook_attract= requests.get(url = endpoint,
                                  params= attraction_params,
                                  headers = headers)
        if response_brook_attract.status_code == 200:
            brook_data_attract = response_brook_attract.json()
            all_brook_attract_data.append(brook_data_attract)

        else:
            print(f'API request failed with {response_brook_attract.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [36]:
all_brook_attract_data = json.dumps(all_brook_attract_data)

brook_attract_df = pd.json_normalize(json.loads(all_brook_attract_data), 'businesses', meta=['total', 'region'])

In [37]:
# brook_attract_df.head(5)

# Used for checking Duplicates
# brook_attract_df.duplicated(subset=['name']).value_counts()

Paris Restaurant Call

In [38]:
all_paris_rest_data = []
for zip in paris:
    try:
        restaurant_params['location'] = zip
        response_paris= requests.get(url = endpoint,
                                  params= restaurant_params,
                                  headers = headers)
        if response_paris.status_code == 200:
            paris_data = response_paris.json()
            all_paris_rest_data.append(paris_data)

        else:
            print(f'API request failed with {response_paris.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [39]:
# all_paris_rest_data

In [40]:
all_paris_rest_data = json.dumps(all_paris_rest_data)

paris_rest_df = pd.json_normalize(json.loads(all_paris_rest_data), 'businesses', meta=['total', 'region'])

In [41]:
# paris_rest_df.tail(5)

# Used for checking Duplicates
# paris_rest_df.duplicated(subset=['name']).value_counts()

Paris Attraction Call

In [42]:
all_paris_attract_data = []
for zip in paris:
    try:
        attraction_params['location'] = zip
        response_paris_attract= requests.get(url = endpoint,
                                  params= attraction_params,
                                  headers = headers)
        if response_paris_attract.status_code == 200:
            paris_data_attract = response_paris_attract.json()
            all_paris_attract_data.append(paris_data_attract)

        else:
            print(f'API request failed with {response_paris_attract.status_code}')

        time.sleep(10)
    except requests.exceptions.RequestException as e:
        print(f'API request failed {str(e)}')

In [43]:
all_paris_attract_data = json.dumps(all_paris_attract_data)

paris_attract_df = pd.json_normalize(json.loads(all_paris_attract_data), 'businesses', meta=['total', 'region'])

In [44]:
# paris_attract_df.tail(5)

# Used for checking Duplicates
# paris_attract_df.duplicated(subset=['name']).value_counts()

In [45]:
# paris_attract_df['name'].value_counts()

## Adding Columns, Concatenating the DataFrames and Saving to CSVs

* A column to each Dataframe will be added to describe if the row is a restaurant or attraction
* The restaurant and attraction DataFrame will be concatenated for each borough and Paris, leaving us with 6 DataFrames instead of 12
* Each new DataFrame will be saved to a CSV so we can place it in our blob container and access it without having to make the API calls again

Adding a Column

In [46]:
si_rest_df['Restaurant or Attraction?'] = 'Restaurant'

In [47]:
man_rest_df['Restaurant or Attraction?'] = 'Restaurant'
bronx_rest_df['Restaurant or Attraction?'] = 'Restaurant'
queens_rest_df['Restaurant or Attraction?'] = 'Restaurant'
brook_rest_df['Restaurant or Attraction?'] = 'Restaurant'
paris_rest_df['Restaurant or Attraction?'] = 'Restaurant'

In [48]:
si_attract_df['Restaurant or Attraction?'] = 'Attraction'
man_attract_df['Restaurant or Attraction?'] = 'Attraction'
bronx_attract_df['Restaurant or Attraction?'] = 'Attraction'
queens_attract_df['Restaurant or Attraction?'] = 'Attraction'
brook_attract_df['Restaurant or Attraction?'] = 'Attraction'
paris_attract_df['Restaurant or Attraction?'] = 'Attraction'

Concatenating into one DataFrame for each location

In [49]:
full_si = pd.concat([si_rest_df,si_attract_df])
full_man = pd.concat([man_rest_df,man_attract_df])
full_bronx = pd.concat([bronx_rest_df,bronx_attract_df])
full_queens = pd.concat([queens_rest_df,queens_attract_df])
full_brook = pd.concat([brook_rest_df,brook_attract_df])
full_paris = pd.concat([paris_rest_df,paris_attract_df])


In [50]:
full_paris.shape

(2100, 27)

Saving into CSVs

In [51]:
full_si.to_csv('../data/staten_island.csv')
full_man.to_csv('../data/manhatten.csv')
full_bronx.to_csv('../data/bronx.csv')
full_queens.to_csv('../data/queens.csv')
full_brook.to_csv('../data/brooklyn.csv')
full_paris.to_csv('../data/paris.csv')