### Issues
- Scraping code never returned proper data
- Neo4j kept crashing with large datasets

### Visualization Stuff

KFC Limit Cals:
WITH 400 as max_calories
MATCH (Restaurant {name: "KFC"})-[i1:ENTRE]-(m1:Item)
MATCH (Restaurant {name: "KFC"})-[i2:SIDE]-(m2:Item)
WHERE i1.calories + i2.calories < max_calories 
RETURN *


## Part One: Retrieving Healthiest Fast Food Meals

In [400]:
!pip install neo4j-driver
!pip install uszipcode
!pip install -U textblob
!pip install tabulate

Requirement already up-to-date: textblob in c:\users\latif\anaconda3\lib\site-packages
Requirement already up-to-date: nltk>=3.1 in c:\users\latif\anaconda3\lib\site-packages (from textblob)
Requirement already up-to-date: six in c:\users\latif\anaconda3\lib\site-packages (from nltk>=3.1->textblob)
Collecting tabulate
  Downloading tabulate-0.8.2.tar.gz (45kB)
Building wheels for collected packages: tabulate
  Running setup.py bdist_wheel for tabulate: started
  Running setup.py bdist_wheel for tabulate: finished with status 'done'
  Stored in directory: C:\Users\latif\AppData\Local\pip\Cache\wheels\7c\fc\c4\f89c90e8bb6a0052a4ad4a9bc30a61429fea5d3439c63e2efd
Successfully built tabulate
Installing collected packages: tabulate
Successfully installed tabulate-0.8.2


In [401]:
from neo4j.v1 import GraphDatabase, basic_auth
from uszipcode import ZipcodeSearchEngine
import requests
from urllib.parse import quote

import nltk
from textblob.classifiers import NaiveBayesClassifier
from textblob import TextBlob

from IPython.display import HTML, display
import tabulate

### Helper Functions

In [177]:
def clean_str(line):
    invalids = ['"'," ", '\n']
    for n in invalids:
        line = line.replace(n, '')
        
    return line

def get_menu_from_file(filepath):
    items_file = open(filepath, "r")
    items= (items_file.read()).split("\n")
    
    menu = []
    for item in items:
        props = item.split(",")
        menu.append({
            'name': props[0],
            'type': clean_str(props[1]).title(),
            'calories': clean_str(props[2])
        })
    return menu

def insert_menu_into_db(db, menu, restaurant):
    for item in menu:
        info = {'restaurant': restaurant, 'dish_name': item['name'], 'calories': int(item['calories'])}
        db.run("CREATE (a: Item {name: {name}, type:{type}})", item)
        
        if item["type"] == "Main":
            db.run("MATCH(n: Restaurant {name: {restaurant}}) MATCH(v: Item {name: {dish_name}}) CREATE (n)-[:ENTRE {calories: {calories}}]->(v)", info)

        else:
            db.run("MATCH(n: Restaurant {name: {restaurant}}) MATCH(v: Item {name: {dish_name}}) CREATE (n)-[:SIDE {calories: {calories}}]->(v)", info)
            
def get_items_by_calories(db, max_calories, restaurant):
    info = {'max_calories': max_calories, 'restaurant': restaurant}
    results = db.run("""
        WITH {max_calories} as max_calories
        MATCH (Restaurant {name: {restaurant}})-[i1:ENTRE]-(m1:Item)
        MATCH (Restaurant {name: {restaurant}})-[i2:SIDE]-(m2:Item)
        WHERE i1.calories + i2.calories < max_calories 
        RETURN i1, i2, m1, m2
    """, info)
    
    items = {}
    for item in results:
        i1 = {'calories': item['i1']['calories'], 'name': item['m1']['name'], 'type': item['m1']['type']}
        i2 = {'calories': item['i2']['calories'], 'name': item['m2']['name'], 'type': item['m2']['type']}

        if i1['name'] not in items.keys():
            items[i1['name']] = i1
            
        if i2['name'] not in items.keys():
            items[i2['name']] = i2
        
    return items


### Setup

In [334]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "password"))
session = driver.session()

In [320]:
kfc_menu = get_menu_from_file("data/kfc.dat")
kfc_menu

[{'calories': '660',
  'name': 'Extra Crispy Breast and Drumstick',
  'type': 'Main'},
 {'calories': '460',
  'name': 'Extra Crispy Thigh and Drumstick',
  'type': 'Main'},
 {'calories': '260', 'name': 'Grilled Thigh and Drumstick', 'type': 'Main'},
 {'calories': '310', 'name': 'Grilled Breast and Drumstick', 'type': 'Main'},
 {'calories': '370', 'name': 'Original Thigh and Drumstick', 'type': 'Main'},
 {'calories': '480', 'name': 'Original Breast and Drumstick', 'type': 'Main'},
 {'calories': '180', 'name': 'Cole Slaw', 'type': 'Side'},
 {'calories': '25', 'name': 'Green Beans', 'type': 'Side'},
 {'calories': '160', 'name': 'Macaroni and cheese', 'type': 'Side'},
 {'calories': '120', 'name': 'Mashed Potatoes and Gravy', 'type': 'Side'}]

### Insert Information Into Database

In [335]:
ALL_RESTAUNRANTS = ["KFC"]
session.run("CREATE (a:Restaurant {name: $name})", name="KFC")

<neo4j.v1.result.BoltStatementResult at 0x21f42466e80>

In [326]:
insert_menu_into_db(session, kfc_menu, "KFC")

In [327]:
session.close()

### Retreive Healthiest Meals

In [179]:
get_items_by_calories(session, 400, "KFC")

{'Green Beans': {'calories': 25, 'name': 'Green Beans', 'type': 'Side'},
 'Grilled Breast and Drumstick': {'calories': 310,
  'name': 'Grilled Breast and Drumstick',
  'type': 'Main'},
 'Grilled Thigh and Drumstick': {'calories': 260,
  'name': 'Grilled Thigh and Drumstick',
  'type': 'Main'},
 'Mashed Potatoes and Gravy': {'calories': 120,
  'name': 'Mashed Potatoes and Gravy',
  'type': 'Side'},
 'Original Thigh and Drumstick': {'calories': 370,
  'name': 'Original Thigh and Drumstick',
  'type': 'Main'}}

## Part Two: Retrieve Restaurants By Zip Code

In [185]:
search = ZipcodeSearchEngine()
zipcode = search.by_zipcode("10001")
print(zipcode)

{
    "City": "New York",
    "Density": 34035.48387096774,
    "HouseOfUnits": 12476,
    "LandArea": 0.62,
    "Latitude": 40.75368539999999,
    "Longitude": -73.9991637,
    "NEBoundLatitude": 40.8282129,
    "NEBoundLongitude": -73.9321059,
    "Population": 21102,
    "SWBoundLatitude": 40.743451,
    "SWBoungLongitude": -74.00794499999998,
    "State": "NY",
    "TotalWages": 1031960117.0,
    "WaterArea": 0.0,
    "Wealthy": 48903.42702113544,
    "Zipcode": "10001",
    "ZipcodeType": "Standard"
}


### Helper Functions

In [377]:
#Open source code by Yelp,Inc that was modified to fit purpose

# API constants, you shouldn't have to change these.
API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'
BUSINESS_PATH = '/v3/businesses/'  # Business ID will come after slash.

# Defaults for our simple example.
DEFAULT_TERM = 'dinner'
DEFAULT_LOCATION = 'San Francisco, CA'
SEARCH_LIMIT = 5

def request(host, path, api_key, url_params=None):
    """Given your API_KEY, send a GET request to the API.
    Args:
        host (str): The domain host of the API.
        path (str): The path of the API after the domain.
        API_KEY (str): Your API Key.
        url_params (dict): An optional set of query parameters in the request.
    Returns:
        dict: The JSON response from the request.
    Raises:
        HTTPError: An error occurs from the HTTP request.
    """
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }


    response = requests.request('GET', url, headers=headers, params=url_params)
    return response.json()


def search(api_key, term, zip_code, radius):
    """Query the Search API by a search term and location.
    Args:
        term (str): The search term passed to the API.
        location (str): The search location passed to the API.
    Returns:
        dict: The JSON response from the request.
    """
    search = ZipcodeSearchEngine()
    zipcode_details = search.by_zipcode(zip_code)
    url_params = {
        'term': term.replace(' ', '+'),
        'latitude': zipcode_details["Latitude"],
        'longitude': zipcode_details["Longitude"],
        'radius': (1609*radius),
        'limit': SEARCH_LIMIT
    }

    return request(API_HOST, SEARCH_PATH, api_key, url_params=url_params)


def get_business_reviews(api_key, business_id):
    """Query the Business API for reviews by a business ID.
    Args:
        business_id (str): The ID of the business to query.
    Returns:
        dict: The JSON response from the request.
    """
    business_path = BUSINESS_PATH + business_id + '/reviews'

    return request(API_HOST, business_path, api_key)

def extract_restaurants_info(api_key, restaurant, zip_code, radius):
    search_results = search(api_key, restaurant, zip_code, radius)

    restaurants= {}
    for business in search_results['businesses']:
        if business['name'] == restaurant:
            business['reviews'] = get_business_reviews(API_KEY, business['id'])['reviews']
            if business['id'] not in restaurants.keys():
                restaurants[business['id']] = business
                
    return restaurants


## Part Three: Naive Bayes Classification of Restaurant Reviews

In [292]:
def clean_line(line):
    invalids = ['"', '\n']
    for n in invalids:
        line = line.replace(n, '')
        
    return line

def find_string_in_array(array, term):
    found = []
    for item in array:
        try:
            item.index(term)
            found.append(item)
        except:
            pass
    return found

In [300]:
def generate_classifier_from_reviews_file(reviews_filepath):
    reviews_file = open(reviews_filepath, "r")
    reviews_str = reviews_file.read().split(".\n")
    
    reviews = []
    for review in reviews_str:
        review_attributes = review.split("\n")
        review_text = clean_line(find_string_in_array(review_attributes, "Text =")[0].split(" = ")[1])
        rating = int(clean_str(find_string_in_array(review_attributes, "Overall =")[0].split(" = ")[1]))

        if rating > 3:
            reviews.append((review_text, 'pos'))
        elif rating < 3:
            reviews.append((review_text, 'neg'))
            
    return NaiveBayesClassifier(reviews)

In [387]:
CLASSIFIER = generate_classifier_from_reviews_file("data/reviews.data")

In [394]:
CLASSIFIER.classify("The pizza is good.")

'pos'

## Part Four: Putting it All Together

In [236]:
API_KEY= "_QKHga2L3_6ye5qG8OY-M9ZFbji_LFtHZPVdSsqQ40E4V-8VOQDau41rZBPciWJGMijVuP7PCvGoJiEWlCqiDTGUzeN3lRiJm83nqyyB5zOXIYYoeqwTPZoGh705WnYx"
kfcs = extract_restaurants_info(API_KEY, "KFC", "78681", 7)
kfcs.keys()


dict_keys(['kfc-round-rock', 'kfc-round-rock-2', 'kfc-austin-8'])

In [406]:
def print_table(table):
    display(HTML(tabulate.tabulate(table, tablefmt='html')))
    
def get_classification(reviews):
    text = ""
    for review in reviews:
        text += review['text']
    
    return CLASSIFIER.classify("hello world")

def extract_food_item(choices):
    table = [["Item", "Type", "Calories"]]
    for key, choice in choices.items():
        info = [choice['name'], choice['type'], choice['calories']]
        table.append(info)
        
    print_table(table)

def extract_location_info(locations):
    table = [["Yelp Rating", "Address", "Sentiment Analysis"]]
    for key, loc_info in locations.items():
        address = loc_info['location']['address1'] + ", " + loc_info['location']['city'] + ", " + loc_info['location']['state']
        classification = get_classification(loc_info['reviews'])
        info =[loc_info['rating'], address, classification]
        table.append(info)
        
    print_table(table)
        
extract_location_info(locs)

0,1,2
Yelp Rating,Address,Sentiment Analysis
2.0,"404 W Taylor Ave, Round Rock, TX",pos
2.0,"641 Louis Henna Blvd, Round Rock, TX",pos
2.5,"1700 W Parmer Lane, Austin, TX",pos
2.5,"13435 US Hwy 183 North, Austin, TX",pos
1.5,"14824 N I H 35, suite D, Austin, TX",pos


In [412]:
def search_for_healthy_food(db, api_key, calorie_limit, zip_code, radius):
    for restaurant in ALL_RESTAURANTS:
        items = get_items_by_calories(db, calorie_limit, restaurant)
        locations = extract_restaurants_info(api_key, restaurant, zip_code, radius)
        
        print(restaurant)
        print("===============================================================================================================")
        print("Healthy Menu:")
        print("----------------------------------------------------------------------------------------------------------------")
        print(items)
        print("----------------------------------------------------------------------------------------------------------------")
        print("Locations:")
        extract_location_info(locations)
        


In [411]:
session = driver.session()
locs = search_for_healthy_food(session, API_KEY, 500, "78681", 7)
session.close()

KFC
Choices:
{'Original Thigh and Drumstick': {'calories': 370, 'name': 'Original Thigh and Drumstick', 'type': 'Main'}, 'Mashed Potatoes and Gravy': {'calories': 120, 'name': 'Mashed Potatoes and Gravy', 'type': 'Side'}, 'Grilled Breast and Drumstick': {'calories': 310, 'name': 'Grilled Breast and Drumstick', 'type': 'Main'}, 'Grilled Thigh and Drumstick': {'calories': 260, 'name': 'Grilled Thigh and Drumstick', 'type': 'Main'}, 'Macaroni and cheese': {'calories': 160, 'name': 'Macaroni and cheese', 'type': 'Side'}, 'Green Beans': {'calories': 25, 'name': 'Green Beans', 'type': 'Side'}, 'Extra Crispy Thigh and Drumstick': {'calories': 460, 'name': 'Extra Crispy Thigh and Drumstick', 'type': 'Main'}, 'Cole Slaw': {'calories': 180, 'name': 'Cole Slaw', 'type': 'Side'}}


0,1,2
Yelp Rating,Address,Sentiment Analysis
2.0,"404 W Taylor Ave, Round Rock, TX",pos
2.0,"641 Louis Henna Blvd, Round Rock, TX",pos
2.5,"1700 W Parmer Lane, Austin, TX",pos
2.5,"13435 US Hwy 183 North, Austin, TX",pos
1.5,"14824 N I H 35, suite D, Austin, TX",pos
