In [1]:
# Import Libraries

import requests
import json
import re
import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
from pandas.io.json import json_normalize
from math import sin, cos, sqrt, atan2, radians
from geopy.geocoders import Nominatim
import gmaps
import ipywidgets as widgets
from IPython.display import display
import warnings

# Filter Warnings
warnings.filterwarnings('ignore')

# Enact Locator
geolocator = Nominatim(user_agent="Yelp")

# Input API Key

#Note Must Access Yelp API for Key
api_key= API_KEY

headers = {'Authorization': 'Bearer %s' % api_key}
url='https://api.yelp.com/v3/businesses/search'

In [2]:

# Converts Address to Coordinates
def to_coords(address):
    location = geolocator.geocode(address)
    lat = location.latitude
    lon = location.longitude
    return [lat,lon]

# Get businesses data from JSON 
def get_business(business,lat,lon,amount):
    params = {'term':business,'latitude':lat,'longitude':lon,'sort_by':'distance','location':'New York','limit':amount}
    req = requests.get(url, params=params, headers=headers)
    parsed = json.loads(req.text)
    businesses = parsed["businesses"]
    return businesses

# Convert JSON data to DataFrame
def to_df(data,business,lat,long):
    data = json_normalize(data)    
    data['distance'] = data.apply(lambda x: get_distance(lat,long, x['coordinates.latitude'],x['coordinates.longitude']),axis=1)
    data['type'] = business
    return data


In [3]:
# Get distance
def get_distance(lat,long,lat2,long2):
    R = 6373.0
    lat1 = radians(lat)
    lon1 = radians(long)
    lat2 = radians(lat2)
    lon2 = radians(long2)
    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))

    distance = R * c
    return(distance)


In [4]:
# Filter business categories

def in_range(data,dist):
    i = 0
    stores = {}
    dt = data[data['distance'] < dist]
    dt = dt.reset_index()
    for row in dt['categories']:
        num_cat = len(row)
        count = 0
        for n in range(num_cat):
            #print(n,i)
            title = dt['categories'][i][count]['title']
            if(title in stores.keys()):
                stores[title] += 1
            else:
                stores[title] = 1
            count = count + 1
        i = i+1
    sort_store = sorted(stores.items(),key=lambda x: x[1],reverse=True)
    sort_store = pd.DataFrame(sort_store)
    sort_store.columns = ["Category","Number"]
    return pd.DataFrame(sort_store)


In [5]:
# Extract single rating from rating and review count

def get_rating(data):
    rating = np.mean(data['rating'].astype('float'))
    rc = np.sum(data['review_count'].astype('int'))
    return [rating,rc]

# Extract all ratings for a business

def get_ratings(data,dist):
    cats = in_range(data,dist)
    cats['rtg'] = cats.apply(lambda x: get_rating(data[data['categories'].astype(str).str.contains(x['Category'])]),axis=1)
    return(cats)


In [6]:
# Get Park Data for NYC

parks = pd.read_csv("/Users/allanporter/Downloads/PARK.csv")
unique_parks = parks.drop_duplicates(['PARK_NAME'])

# Get Coordinates for the NYC Parks

def find_coords(string):
    res = re.search(r'.[0-9][0-9].[0-9]+ [0-9][0-9].[0-9]+',string)[0]
    res = res.split(" ")
    return res

unique_parks['lon'] = unique_parks.apply(lambda x: find_coords(x['the_geom'])[0],axis=1)
unique_parks['lat'] = unique_parks.apply(lambda x: find_coords(x['the_geom'])[1],axis=1)



In [17]:
# Lat and Lon Points for Parks
#unique_parks['lat'] = unique_parks['lat'].astype('float')
#unique_parks['lon'] = unique_parks['lon'].astype('float')
#unique_parks['Distance'] = unique_parks.apply(lambda x: get_distance(lat,long,x['lat'],x['lon']),axis=1)

In [9]:
# Filter Park Type

unique_parks['clf'] = unique_parks['LANDUSE']

def check_type(dt):
    if(dt['clf'] != 'nan'):
        return(dt['LANDUSE'])
    else:
        park = dt['PARK_NAME'].lower()
        if(' park' in park):
            return('Park')
        elif(park.endswith(' square')):
            return('Square')
        elif(park.endswith(' boardwalk')):
            return('Boardwalk')
        elif(park.endswith(' triangle')):
            return('Triangle')
        elif('playground' in park):
            return('Playground')
        elif(('fields' in park)or('ballfields' in park)or('rink' in park)or('recreation' in park)or("field" in park)):
            return('Fields')
        elif('plaza' in park):
            return('Plaza')
        elif('mall' in park):
            return('Mall')
        elif('parkway' in park):
            return('Parkway')
        elif(('p.s.' in park)or('i.s.' in park)or('m.s.' in park)or('ps' in park)):
            return('School')
        elif(park.endswith(' golf course')):
            return('Golf Course')
        else:
            return('unknown')
    
    
unique_parks["clf"] = unique_parks.apply(lambda x: check_type(x),axis=1)


In [10]:
yelp_attributes = ["cafes","restaurants","bars","nightlife","landmarks","street vendors", "vegan", "vegetarian"
                  , "gyms", "stores","parks","zoos","playgrounds","cinemas","bowling","hiking","fishing","museums"
                  ,"Kids Activities","swimming pools"]

In [11]:
def pipeline(addr,dist):
    lat,lon = to_coords(addr)
    count = 0
    final = pd.DataFrame()
    for attr in yelp_attributes:
        #print(attr)
        dt = get_business(attr,lat,lon,50)
        data = to_df(dt,attr,lat,lon)
        #data = data.reset_index()
        l = len(data[data['distance'] <= dist])
        if(l == 0):
            pass
        else:    
            result = get_ratings(data,dist)
            #print(len(result))
            if(count == 0):
                final = result
            else:
                final = final.append(result,ignore_index=True)
            count += 1
    final['Rating'] = [r for r,v in final['rtg']]
    final['Review_Count'] = [v for r,v in final['rtg']]
    final = final.sort_values(by="Review_Count",ascending=False)
    final = final.drop_duplicates(subset='Category', keep="first")
    final = final[final["Review_Count"]>0]
    final = final.reset_index()
    return(final[["Category","Number","Rating","Review_Count"]])


In [12]:
def get_parks(lat,lon,dist):
    unique_parks['Distance'] = unique_parks.apply(lambda x: get_distance(lat,lon,x['lat'],x['lon']),axis=1)
    up = unique_parks[unique_parks['Distance']<dist]
    return up


In [13]:
def get_score(f):
    return np.mean(f[f['Review_Count']>5]['Rating'])
#get_score(f)

In [16]:
def highest_score(data):
    data = data.sort_values(by="Rating",ascending=False)
    data = data[data['Review_Count'] > 5]
    data =  data[['Category','Rating']][0:5]
    data = data.reset_index()
    print("Your neighborhood has highly rated {attr1}, {attr2}, {attr3}, {attr4}, and {attr5}.".format(
    attr1= data['Category'][0], attr2 = data['Category'][1], attr3 = data['Category'][2], attr4 = data['Category'][3],
        attr5 = data['Category'][4])
    )
          
f = pipeline("43 Maple Avenue, Cedarhurst, NY",2)

In [18]:
input_text = widgets.Text()

def bind_input_to_output(sender):
    address = input_text.value
    f = pipeline(address,2)
    score = get_score(f)
    print("Your Neighburhood Score is: ")
    print(score)
    print(highest_score(f))
input_text.on_submit(bind_input_to_output)
print("Please Input an Address")
input_text


Please Input an Address


Text(value='')

Your Neighburhood Score is: 
3.7488058872318377
Your neighborhood has highly rated Bus Tours, Boot Camps, Interval Training Gyms, Reiki, and Taekwondo.
None
