## Final Project
## Jin Liu
### Find restaurants for users in NYC and show on Map in an automatable approach
### API: Yelp API

### Data acquisition: This project intends to help users to find restaurants in NYC when they put the type of food they want. Also, the project will build a map to show locations of these restaurants. Firstly, I used APIs to obtain data and write an API client. Then, I wrote a for loop and function associated with API interaction. I also handle it in JSON. 

In [6]:
import requests
import pandas as pd
import configparser
import config

# read configs
# config = configparser.ConfigParser()
# config.read('/Users/jinliu/Desktop/project/config.ini')

api_key = config.api_key
client_id = config.client_id

# Set the base URL for the Yelp API
base_url = 'https://api.yelp.com/v3/businesses/search'

headers = {
"Authorization": "Bearer " + api_key
}

# Set the parameters for the API request
params = {
    'term': 'cafe',  # search for restaurants
    'location': 'New York',  # in New York
    'limit': 50
}
response = requests.get(base_url, params=params, headers=headers)

# Get the list of businesses from the response
businesses = response.json()['businesses']

# Append the results for the current restaurant to the results list
df = pd.DataFrame(businesses)

# Create an empty list to store the results
results = []

# Iterate over a list of restaurant names
for terms in ['restaurant', 'cocktailbar', 'bar']:
    # Set the 'term' parameter to the current restaurant name
    params['term'] = terms
    for place in ['New York','long island city', 'jersey city']:
        params['location'] = place
    
        # Make the API request
        response = requests.get(base_url, params=params, headers=headers)

        # Get the list of businesses from the response
        businesses = response.json()['businesses']

        # Append the results for the current restaurant to the results list
        df = pd.concat([pd.DataFrame(businesses),df])

# The results list will contain the results for each restaurant


In [7]:
df.head()

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,geaAzPXHBqH48SopnWKqXA,the-junto-attic-bar-jersey-city,The Junto Attic Bar,https://s3-media3.fl.yelpcdn.com/bphoto/44ZMJC...,False,https://www.yelp.com/biz/the-junto-attic-bar-j...,33,"[{'alias': 'bars', 'title': 'Bars'}]",4.5,"{'latitude': 40.718909, 'longitude': -74.045386}",[],$$$,"{'address1': '68 Mercer St', 'address2': '', '...",,,1455.957344
1,_hvgCRW8Vw1n3Usr9RPoUg,cellar-335-jersey-city,Cellar 335,https://s3-media4.fl.yelpcdn.com/bphoto/XVOpAK...,False,https://www.yelp.com/biz/cellar-335-jersey-cit...,673,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 40.7249692, 'longitude': -74.0515...","[delivery, pickup]",$$,"{'address1': '335 Newark Ave', 'address2': '',...",12012221422.0,(201) 222-1422,697.779032
2,AZfcw8HHwstl-v6mTNIBIg,canopy-bar-and-bistro-jersey-city,Canopy Bar & Bistro,https://s3-media2.fl.yelpcdn.com/bphoto/W3IJIk...,False,https://www.yelp.com/biz/canopy-bar-and-bistro...,16,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.0,"{'latitude': 40.71985, 'longitude': -74.0401546}",[delivery],,"{'address1': '159 Morgan St', 'address2': 'Fl ...",12012414664.0,(201) 241-4664,1789.000622
3,r4SazXX-ISikdRYWbH-HAg,hudson-and-co-jersey-city,Hudson & Co,https://s3-media2.fl.yelpcdn.com/bphoto/327s7K...,False,https://www.yelp.com/biz/hudson-and-co-jersey-...,693,"[{'alias': 'gastropubs', 'title': 'Gastropubs'...",4.0,"{'latitude': 40.720988, 'longitude': -74.031573}","[delivery, pickup]",$$$,"{'address1': '3 2nd St', 'address2': '', 'addr...",12016857330.0,(201) 685-7330,2432.095875
4,pqgteq-86xHmjinGwwb1ZA,rooftop-at-exchange-place-jersey-city-2,Rooftop at Exchange Place,https://s3-media2.fl.yelpcdn.com/bphoto/Lb_l7i...,False,https://www.yelp.com/biz/rooftop-at-exchange-p...,408,"[{'alias': 'newamerican', 'title': 'American (...",3.0,"{'latitude': 40.715958, 'longitude': -74.033869}",[],$$$,"{'address1': '1 Exchange Pl', 'address2': '', ...",15512567850.0,(551) 256-7850,2497.27268


### Data Cleaning, Transformation, and Organization
### I firstly reorganized the data and wrote a new category that contain only one tag for each restaurant. Therefore, the dataset is ready to use for an analysis on Python. 

In [8]:
categories = []
for i in range(df.shape[0]):
    if type(df.iloc[i]['categories']) != type('i'):
        categories.append(df.iloc[i]['categories'][0]['alias'])
    else:
        categories.append(df.iloc[i]['categories'])

In [9]:
df = df.assign(categories2 = categories)
df

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance,categories2
0,geaAzPXHBqH48SopnWKqXA,the-junto-attic-bar-jersey-city,The Junto Attic Bar,https://s3-media3.fl.yelpcdn.com/bphoto/44ZMJC...,False,https://www.yelp.com/biz/the-junto-attic-bar-j...,33,"[{'alias': 'bars', 'title': 'Bars'}]",4.5,"{'latitude': 40.718909, 'longitude': -74.045386}",[],$$$,"{'address1': '68 Mercer St', 'address2': '', '...",,,1455.957344,bars
1,_hvgCRW8Vw1n3Usr9RPoUg,cellar-335-jersey-city,Cellar 335,https://s3-media4.fl.yelpcdn.com/bphoto/XVOpAK...,False,https://www.yelp.com/biz/cellar-335-jersey-cit...,673,"[{'alias': 'newamerican', 'title': 'American (...",4.5,"{'latitude': 40.7249692, 'longitude': -74.0515...","[delivery, pickup]",$$,"{'address1': '335 Newark Ave', 'address2': '',...",+12012221422,(201) 222-1422,697.779032,newamerican
2,AZfcw8HHwstl-v6mTNIBIg,canopy-bar-and-bistro-jersey-city,Canopy Bar & Bistro,https://s3-media2.fl.yelpcdn.com/bphoto/W3IJIk...,False,https://www.yelp.com/biz/canopy-bar-and-bistro...,16,"[{'alias': 'cocktailbars', 'title': 'Cocktail ...",4.0,"{'latitude': 40.71985, 'longitude': -74.0401546}",[delivery],,"{'address1': '159 Morgan St', 'address2': 'Fl ...",+12012414664,(201) 241-4664,1789.000622,cocktailbars
3,r4SazXX-ISikdRYWbH-HAg,hudson-and-co-jersey-city,Hudson & Co,https://s3-media2.fl.yelpcdn.com/bphoto/327s7K...,False,https://www.yelp.com/biz/hudson-and-co-jersey-...,693,"[{'alias': 'gastropubs', 'title': 'Gastropubs'...",4.0,"{'latitude': 40.720988, 'longitude': -74.031573}","[delivery, pickup]",$$$,"{'address1': '3 2nd St', 'address2': '', 'addr...",+12016857330,(201) 685-7330,2432.095875,gastropubs
4,pqgteq-86xHmjinGwwb1ZA,rooftop-at-exchange-place-jersey-city-2,Rooftop at Exchange Place,https://s3-media2.fl.yelpcdn.com/bphoto/Lb_l7i...,False,https://www.yelp.com/biz/rooftop-at-exchange-p...,408,"[{'alias': 'newamerican', 'title': 'American (...",3.0,"{'latitude': 40.715958, 'longitude': -74.033869}",[],$$$,"{'address1': '1 Exchange Pl', 'address2': '', ...",+15512567850,(551) 256-7850,2497.272680,newamerican
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,CBmrwh7jHn88M4v8Q9Qyyg,white-noise-coffee-co-brooklyn,White Noise Coffee Co,https://s3-media2.fl.yelpcdn.com/bphoto/DlrPHd...,False,https://www.yelp.com/biz/white-noise-coffee-co...,136,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.0,"{'latitude': 40.6896242, 'longitude': -73.9882...",[delivery],$$,"{'address1': '57 Smith St', 'address2': '', 'a...",+17188580258,(718) 858-0258,1824.556563,coffee
46,GP-jlZ6DP1wZCJ8cfalxwA,bao-tea-house-mulberry-new-york,BAO Tea House - Mulberry,https://s3-media3.fl.yelpcdn.com/bphoto/9lKVbk...,False,https://www.yelp.com/biz/bao-tea-house-mulberr...,9,"[{'alias': 'cafes', 'title': 'Cafes'}, {'alias...",4.5,"{'latitude': 40.717812, 'longitude': -73.997952}","[pickup, delivery]",,"{'address1': '122 Mulberry St', 'address2': No...",+16463516759,(646) 351-6759,1415.116934,cafes
47,lYuebxgzj4UYsKuUpAlr0w,café-kitsuné-brooklyn,Café Kitsuné,https://s3-media4.fl.yelpcdn.com/bphoto/ddEExG...,False,https://www.yelp.com/biz/caf%C3%A9-kitsun%C3%A...,17,"[{'alias': 'cafes', 'title': 'Cafes'}]",3.0,"{'latitude': 40.68644457276981, 'longitude': -...",[],,"{'address1': '112 Bond St', 'address2': '', 'a...",,,2244.621968,cafes
48,qGvo8VHrY85b4Ze-JaIjYg,ralphs-coffee-new-york-4,Ralph's Coffee,https://s3-media3.fl.yelpcdn.com/bphoto/jFtjAP...,False,https://www.yelp.com/biz/ralphs-coffee-new-yor...,128,"[{'alias': 'coffee', 'title': 'Coffee & Tea'}]",4.5,"{'latitude': 40.7716587057718, 'longitude': -7...",[delivery],$$,"{'address1': '888 Madison Ave', 'address2': ''...",+12124348000,(212) 434-8000,7745.004337,coffee


### Data wrangling and manipulation
### Then I tried to find a new dataframe that list top five categories of food that people like. This is used to give users some recommendations. The result I got from order review counts and ratings in a decending order. We can see that top five types are cocktailbars, coffee, italian, newamericna, and thai according to the Yelp review in NYC. 

In [10]:
data = df.groupby('categories2')['review_count'].sum()
data1= data.reset_index(name = 'total_review')
data1

Unnamed: 0,categories2,total_review
0,arcades,52
1,argentine,507
2,asianfusion,398
3,australian,747
4,bakeries,71
...,...,...
69,uzbek,34
70,venues,13
71,vietnamese,2036
72,whiskeybars,840


In [11]:
data = df.groupby('categories2')['rating'].mean()
data2 = data.reset_index(name = 'mean_rating')
data2

Unnamed: 0,categories2,mean_rating
0,arcades,4.000000
1,argentine,4.500000
2,asianfusion,4.000000
3,australian,4.000000
4,bakeries,4.500000
...,...,...
69,uzbek,4.500000
70,venues,4.250000
71,vietnamese,4.300000
72,whiskeybars,4.500000


In [12]:
# Join the two dataframes on the "business_id" column
data3 = data1.merge(data2, on='categories2')
data3

Unnamed: 0,categories2,total_review,mean_rating
0,arcades,52,4.000000
1,argentine,507,4.500000
2,asianfusion,398,4.000000
3,australian,747,4.000000
4,bakeries,71,4.500000
...,...,...,...
69,uzbek,34,4.500000
70,venues,13,4.250000
71,vietnamese,2036,4.300000
72,whiskeybars,840,4.500000


In [13]:
newdf = data3.sort_values(by=['total_review', 'mean_rating'], ascending=[False, False])
newdf = newdf.head(5)
newdf

Unnamed: 0,categories2,total_review,mean_rating
22,coffee,16262,4.396552
21,cocktailbars,15326,4.25
37,italian,10873,4.107143
49,newamerican,10619,4.108696
67,thai,9410,4.388889


### Data visualization
### Then I want to show restaurants' names and locations on a Map when a user has some ideas of the type of food they want. So, I built a map like this. 

In [14]:
cocktailbars = df.loc[df['categories2'] == 'cocktailbars']
coffee = df.loc[df['categories2'] == 'coffee']
italian = df.loc[df['categories2'] == 'italian']
newamerican = df.loc[df['categories2'] == 'newamerican']
thai = df.loc[df['categories2'] == 'thai']

In [15]:
import folium
from folium.plugins import MarkerCluster
# Create a map centered on New York City
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

# Create a marker cluster object
marker_cluster = MarkerCluster().add_to(m)

# Add a marker for each business
for b in range(cocktailbars.shape[0]):
    folium.Marker(
        location=[cocktailbars.iloc[b]['coordinates']['latitude'], cocktailbars.iloc[b]['coordinates']['longitude']],popup=cocktailbars.iloc[b]['name']).add_to(marker_cluster)

# Display the map
m

In [16]:
categ = set(['bars', 'bars', 'newamerican', 'beergardens', 'beergardens', 'newamerican', 'cocktailbars', 
                  'southern', 'pubs', 'gastropubs', 'divebars', 'distilleries', 'bars', 'newamerican', 
                  'cocktailbars', 'cuban', 'pubs', 'bars', 'mini_golf', 'breakfast_brunch', 'cocktailbars', 
                  'australian', 'lounges', 'cocktailbars', 'newamerican', 'chicken_wings', 'breweries', 'venues', 
                  'bars', 'newamerican', 'arcades', 'breweries', 'pizza', 'bars', 'wine_bars', 'cafes', 'bars',
                  'bars', 'lounges', 'pubs', 'newamerican', 'latin', 'lounges', 'lounges', 'bars', 'argentine',
                  'wine_bars', 'bars', 'gastropubs', 'gastropubs', 'divebars', 'pizza', 'cocktailbars', 'italian',
                  'bowling', 'divebars', 'pizza', 'coffee', 'brazilian', 'mexican', 'irish', 'bars', 'musicvenues',
                  'peruvian', 'bars', 'lounges', 'newamerican', 'breweries', 'cocktailbars', 'tradamerican',
                  'newamerican', 'pizza', 'szechuan', 'cocktailbars', 'newamerican', 'cocktailbars', 'bars',
                  'speakeasies', 'cocktailbars', 'cocktailbars', 'cocktailbars', 'lounges', 'mini_golf',
                  'cocktailbars', 'asianfusion', 'venues', 'lounges', 'beerbar', 'cocktailbars', 'bars',
                  'cocktailbars', 'bars', 'cocktailbars', 'cocktailbars', 'bars', 'lounges', 'coffee', 'bars',
                  'cocktailbars', 'cocktailbars', 'divebars', 'cocktailbars', 'divebars', 'cocktailbars',
                  'mediterranean', 'cocktailbars', 'cocktailbars', 'bars', 'cocktailbars', 'divebars', 'cocktailbars',
                  'cocktailbars', 'cocktailbars', 'cocktailbars', 'whiskeybars', 'cocktailbars', 'bars', 'lounges',
                  'bars', 'cocktailbars', 'bars', 'wine_bars', 'cocktailbars', 'asianfusion', 'seafood',
                  'cocktailbars', 'asianfusion', 'seafood', 'cocktailbars', 'asianfusion', 'seafood', 'newamerican',
                  'korean', 'noodles', 'newamerican', 'gastropubs', 'japanese', 'italian', 'caribbean', 'newamerican',
                  'thai', 'korean', 'chinese', 'cocktailbars', 'newamerican', 'cocktailbars', 'newamerican', 'cuban',
                  'italian', 'pizza', 'vegan', 'latin', 'pizza', 'seafood', 'chinese', 'newamerican', 'mexican',
                  'thai', 'tradamerican', 'beerbar', 'desserts', 'beerbar', 'cocktailbars', 'mexican', 'tacos',
                  'italian', 'vietnamese', 'italian', 'chinese', 'cocktailbars', 'ramen', 'brazilian', 'seafood',
                  'vietnamese', 'breakfast_brunch', 'vietnamese', 'bars', 'bangladeshi', 'malaysian', 'indpak',
                  'portuguese', 'newamerican', 'newamerican', 'peruvian', 'szechuan', 'chinese', 'pizza', 'ramen',
                  'chicken_wings', 'chinese', 'pizza', 'noodles', 'chicken_wings', 'japanese', 'mexican', 'thai',
                  'latin', 'italian', 'argentine', 'szechuan', 'bubbletea', 'chinese', 'pizza', 'argentine', 'italian',
                  'japanese', 'thai', 'tacos', 'seafood', 'bbq', 'thai', 'italian', 'sushi', 'french', 'chinese',
                  'vietnamese', 'spanish', 'modern_european', 'szechuan', 'dimsum', 'bars', 'latin', 'filipino',
                  'gastropubs', 'sushi', 'pizza', 'pizza', 'peruvian', 'coffee', 'bars', 'chinese', 'italian', 'thai',
                  'shanghainese', 'food_court', 'pizza', 'szechuan', 'newamerican', 'cocktailbars', 'newamerican',
                  'breakfast_brunch', 'korean', 'mediterranean', 'pizza', 'japanese', 'japanese', 'japanese',
                  'chinese', 'newamerican', 'italian', 'korean', 'spanish', 'thai', 'grocery', 'italian', 'steak',
                  'korean', 'uzbek', 'bars', 'asianfusion', 'tradamerican', 'buffets', 'italian', 'seafood',
                  'chinese', 'korean', 'italian', 'mideastern', 'vietnamese', 'seafood', 'taiwanese', 'japanese',
                  'thai', 'japanese', 'french', 'cocktailbars', 'seafood', 'thai', 'hotdogs', 'korean', 'vietnamese',
                  'coffee', 'coffee', 'coffee', 'cafes', 'coffee', 'coffee', 'bookstores', 'australian', 'coffee',
                  'mideastern', 'coffee', 'coffee', 'breakfast_brunch', 'coffee', 'coffee', 'cafes', 'coffee',
                  'coffee', 'bookstores', 'coffee', 'tea', 'coffee', 'coffee', 'coffee', 'coffee', 'coffee',
                  'coffee', 'coffee', 'coffee', 'cafes', 'bakeries', 'breakfast_brunch', 'cafes', 'coffee', 'cafes',
                  'coffee', 'cafes', 'coffee', 'cafes', 'coffee', 'cafes', 'coffee', 'coffee', 'cafes', 'bakeries',
                  'coffee', 'coffee', 'coffee', 'coffee', 'bakeries','bars', 'newamerican', 'cocktailbars', 'gastropubs', 
                  'newamerican', 'newamerican', 'divebars', 'cocktailbars', 'pubs', 
                  'pubs', 'beergardens', 'cocktailbars', 'bars', 'caribbean', 'gaybars', 
                  'lounges', 'cocktailbars', 'tradamerican', 'bars', 'beerbar', 'cocktailbars'])

### I made categories to be unique and not reptitive. This is useful when I build a function for next step. 

In [17]:
categ

{'arcades',
 'argentine',
 'asianfusion',
 'australian',
 'bakeries',
 'bangladeshi',
 'bars',
 'bbq',
 'beerbar',
 'beergardens',
 'bookstores',
 'bowling',
 'brazilian',
 'breakfast_brunch',
 'breweries',
 'bubbletea',
 'buffets',
 'cafes',
 'caribbean',
 'chicken_wings',
 'chinese',
 'cocktailbars',
 'coffee',
 'cuban',
 'desserts',
 'dimsum',
 'distilleries',
 'divebars',
 'filipino',
 'food_court',
 'french',
 'gastropubs',
 'gaybars',
 'grocery',
 'hotdogs',
 'indpak',
 'irish',
 'italian',
 'japanese',
 'korean',
 'latin',
 'lounges',
 'malaysian',
 'mediterranean',
 'mexican',
 'mideastern',
 'mini_golf',
 'modern_european',
 'musicvenues',
 'newamerican',
 'noodles',
 'peruvian',
 'pizza',
 'portuguese',
 'pubs',
 'ramen',
 'seafood',
 'shanghainese',
 'southern',
 'spanish',
 'speakeasies',
 'steak',
 'sushi',
 'szechuan',
 'tacos',
 'taiwanese',
 'tea',
 'thai',
 'tradamerican',
 'uzbek',
 'vegan',
 'venues',
 'vietnamese',
 'whiskeybars',
 'wine_bars'}

### I used a function to export repetitive or difficult tasks. By using this function, when users put the term (eg: a type of food), they can automatically get the map that show restaurants' names and locations on a map. If users type the term not within the category, there will be an error

In [18]:
def get_map(term):
    api_key = config.api_key
    headers = {
      "Authorization": "Bearer " + api_key
    }
    # Set the base URL for the Yelp API
    base_url = 'https://api.yelp.com/v3/businesses/search'

    # Set the parameters for the API request
    params = {
        'term': 'restaurant' , # search for restaurants
        'location': 'New York' ,# in New York
        'limit': 50
    }
    response = requests.get(base_url, params=params, headers=headers)

    # Get the list of businesses from the response
    businesses = response.json()['businesses']

    # Append the results for the current restaurant to the results list
    df = pd.DataFrame(businesses)

    # Iterate over a list of restaurant names
    for terms in ['restaurant', 'cocktailbar', 'bar']:
        # Set the 'term' parameter to the current restaurant name
        params['term'] = terms
        for place in ['New York','long island city', 'jersey city']:
            params['location'] = place

            # Make the API request
            response = requests.get(base_url, params=params, headers=headers)

            # Get the list of businesses from the response
            businesses = response.json()['businesses']

            # Append the results for the current restaurant to the results list
            df = pd.concat([pd.DataFrame(businesses),df])
    categories = []
    for i in range(df.shape[0]):
        if type(df.iloc[i]['categories']) != type('i'):
            categories.append(df.iloc[i]['categories'][0]['alias'])
        else:
            categories.append(df.iloc[i]['categories'])

    df = df.assign(categories2 = categories)
    looking_for = df.loc[df['categories2'] == term]
    
        # Create a map centered on New York City
    m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)

    # Create a marker cluster object
    marker_cluster = MarkerCluster().add_to(m)

    # Add a marker for each business
    for b in range(looking_for.shape[0]):
        folium.Marker(
            location=[looking_for.iloc[b]['coordinates']['latitude'], looking_for.iloc[b]['coordinates']['longitude']],popup=cocktailbars.iloc[b]['name']).add_to(marker_cluster)

    # Display the map
    if looking_for.shape[0] == 0:
        return None
    return m

In [19]:
get_map('spanish')

### Here, I want to write some simple tests that make sure the function work both for normal condition and exception cases. 

In [20]:
import folium
from folium.plugins import MarkerCluster
def test_get_map(term):
    m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)
    expected = m
    actual = get_map(term)
    assert type(actual) == type(expected), "Result should be an map object"
    return actual

In [21]:
test_get_map('spanish')

In [22]:
import pytest
def test():
    with pytest.raises(AssertionError):
        test_get_map('somethingraondom')

    