# Italian vs. Mexican Food
---

The below script provides an analytic approach for assessing the American preference of Italian vs. Mexican food. Using data from the US Census and the Yelp API, the script randomly selects 350 zip codes and aggregates the review of the top 10 Italian and Mexican restaurants in each area. Summary data is then reported using Python Pandas. 

In [37]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt
import requests
import time

# Yelp API Key
ykey_id = "1GwZyE0zIjSujpHtlMnodQ"
ykey_secret = "mcTmghB48JIH0xoNWLldvsX9uIiOLQfdi0gR8LWdFt02lboCAF9vxSSd1MI0KtZ0"
ykey_access_token = "gl6k6JmewUhzjMVBv0I2x4Bz_NRiEggSqjlGbTaejmbzvBJXgI36FPgWoqBnEL9QQ6wU5H4h41dxPkxVjHFlawtH69m1kcXQuHev5PuWBtcdBEAbdJR0HNl3d4tpWXYx"

## Zip Code Sampling

In [28]:
# Import the census data into a Pandas DataFrame
census_pd = pd.read_csv("Census_Data.csv")

# Preview the data
census_pd.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income
0,15081,"South Heights, PA 15081, USA",342,50.2,31500.0,22177
1,20615,"Broomes Island, MD 20615, USA",424,43.4,114375.0,43920
2,50201,"Nevada, IA 50201, USA",8139,40.4,56619.0,28908
3,84020,"Draper, UT 84020, USA",42751,30.4,89922.0,33164
4,39097,"Louise, MS 39097, USA",495,58.0,26838.0,17399


In [45]:
# Randomly select 500 zip code locations that have at least 1000 residents
selected_zips = census_pd.sample(n=25)
selected_zips = selected_zips[selected_zips["Population"].astype(int) > 1000]

# Visualize
selected_zips.head()

Unnamed: 0,Zipcode,Address,Population,Median Age,Household Income,Per Capita Income
259,12941,"Jay, NY 12941, USA",1374,44.6,72361.0,40100
604,17980,"Tower City, PA 17980, USA",2963,45.3,50240.0,24934
213,7082,"Towaco, NJ 07082, USA",5107,45.9,129432.0,59216
406,27283,"Julian, NC 27283, USA",3124,43.4,52015.0,27215
512,3104,"Manchester, NH 03104, USA",33145,37.6,60468.0,32130


In [46]:
# Show the total zip codes selected
selected_zips.count()

Zipcode              17
Address              17
Population           17
Median Age           17
Household Income     17
Per Capita Income    17
dtype: int64

In [47]:
# Show the average population
selected_zips["Population"].mean()

10590.058823529413

In [48]:
# Show the average population
selected_zips["Household Income"].mean()

52361.23529411765

In [49]:
# Show the average population
selected_zips["Median Age"].mean()

42.37647058823529

## Yelp Data Retrieval

In [109]:
# Create Two DataFrames to store the Italian and the Mexican Data 
italian_data = pd.DataFrame();
mexican_data = pd.DataFrame();

# Setup the DataFrames to have appropriate columns
italian_data["Zip Code"] = ""
italian_data["Review Count"] = ""
italian_data["Average Rating"] = ""
italian_data["Weighted Rating"] = ""

mexican_data["Zip Code"] = ""
mexican_data["Review Count"] = ""
mexican_data["Average Rating"] = ""
mexican_data["Weighted Rating"] = ""

# Include Yelp Token
headers = {"Authorization": "Bearer gl6k6JmewUhzjMVBv0I2x4Bz_NRiEggSqjlGbTaejmbzvBJXgI36FPgWoqBnEL9QQ6wU5H4h41dxPkxVjHFlawtH69m1kcXQuHev5PuWBtcdBEAbdJR0HNl3d4tpWXYx"}

# Loop through every zip code
for index, row in selected_zips.iterrows():
    
    # Create two endpoint URLs:
    target_url_italian = "https://api.yelp.com/v3/businesses/search?term=Italian&location=%s" % (row["Zipcode"])
    target_url_mexican = "https://api.yelp.com/v3/businesses/search?term=Mexican&location=%s" % (row["Zipcode"])
    
    # Get the Yelp Reviews
    yelp_reviews_italian = requests.get(target_url_italian, headers=headers).json()
    yelp_reviews_mexican = requests.get(target_url_italian, headers=headers).json()
    
    # Calculate the total reviews and weighted rankings
    italian_review_count = 0
    italian_average_rating = 0
    italian_weighted_review = 0
    
    mexican_review_count = 0
    mexican_average_rating = 0
    mexican_weighted_review = 0
    
    # Loop through all records to calculate the review count and weighted review value
    for business in yelp_reviews_italian["businesses"]:

        italian_review_count = italian_review_count + business["review_count"]
        italian_average_rating = business["rating"]
        italian_weighted_review = italian_review_count * business["rating"]
    
    for business in yelp_reviews_mexican["businesses"]:
        mexican_review_count = mexican_review_count + business["review_count"]
        mexican_average_rating = business["rating"]
        mexican_weighted_review = mexican_review_count * business["rating"] 

    # Append the data to the appropriate column of the data frames
    try:
        italian_data.set_value(index, "Zip Code", row["Zipcode"])
        italian_data.set_value(index, "Review Count", italian_review_count)
        italian_data.set_value(index, "Average Rating", italian_average_rating)
        italian_data.set_value(index, "Weighted Rating", italian_weighted_review)

        mexican_data.set_value(index, "Zip Code", row["Zipcode"])
        mexican_data.set_value(index, "Review Count", mexican_review_count)
        mexican_data.set_value(index, "Average Rating", mexican_average_rating)
        mexican_data.set_value(index, "Weighted Rating", mexican_weighted_review)

    except:
        print("Uh oh")
        


In [110]:
# Preview Italian Data
italian_data

Unnamed: 0,Zip Code,Review Count,Average Rating,Weighted Rating
259,12941,533,4.5,2398.5
604,17980,22,2.5,55.0
213,7082,43,3.0,129.0
406,27283,986,4.0,3944.0
512,3104,1320,4.0,5280.0
329,28395,1219,3.5,4266.5
317,12723,389,4.0,1556.0
261,33614,2445,3.5,8557.5
284,66052,1870,3.5,6545.0
648,14807,233,4.0,932.0


In [111]:
# Preview Mexican Data
mexican_data

Unnamed: 0,Zip Code,Review Count,Average Rating,Weighted Rating
259,12941,533,4.5,2398.5
604,17980,22,2.5,55.0
213,7082,43,3.0,129.0
406,27283,986,4.0,3944.0
512,3104,1320,4.0,5280.0
329,28395,1219,3.5,4266.5
317,12723,389,4.0,1556.0
261,33614,2445,3.5,8557.5
284,66052,1870,3.5,6545.0
648,14807,233,4.0,932.0


## Summarize Data

In [112]:
mexican_data["Review Count"].sum()

13396

In [113]:
italian_data["Review Count"].sum()

13396

In [114]:
mexican_data["Weighted Rating"].sum() / mexican_data["Review Count"].sum()

3.726224246043595

In [115]:
italian_data["Weighted Rating"].sum() / italian_data["Review Count"].sum()

3.726224246043595

In [116]:
# Combine Data Frames into a single Data Frame
combined_data = pd.concat([mexican_data, italian_data], keys=["Mexican", "Italian"])
combined_data

Unnamed: 0,Unnamed: 1,Zip Code,Review Count,Average Rating,Weighted Rating
Mexican,259,12941,533,4.5,2398.5
Mexican,604,17980,22,2.5,55.0
Mexican,213,7082,43,3.0,129.0
Mexican,406,27283,986,4.0,3944.0
Mexican,512,3104,1320,4.0,5280.0
Mexican,329,28395,1219,3.5,4266.5
Mexican,317,12723,389,4.0,1556.0
Mexican,261,33614,2445,3.5,8557.5
Mexican,284,66052,1870,3.5,6545.0
Mexican,648,14807,233,4.0,932.0
