In [2]:
%run imports_and_functions.ipynb

## Installing YelpAPI

In [14]:
!pip install yelpapi

# Connection with yelp api managed through github sourced yelpapi, author gfairchild, permitted with
# display of copyright notice below.
"""
    Copyright (c) 2013, Los Alamos National Security, LLC
    All rights reserved.
    Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
    following conditions are met:
    * Redistributions of source code must retain the above copyright notice, this list of conditions and the following
      disclaimer.
    * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
      following disclaimer in the documentation and/or other materials provided with the distribution.
    * Neither the name of Los Alamos National Security, LLC nor the names of its contributors may be used to endorse or
      promote products derived from this software without specific prior written permission.
    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
    INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""

Collecting yelpapi
  Downloading https://files.pythonhosted.org/packages/bb/07/f01be72829a3ce2da71bfde33d4bfe9ce5d8173a5a0470420fcb4dbacdd9/yelpapi-2.3.0-py2.py3-none-any.whl
Installing collected packages: yelpapi
Successfully installed yelpapi-2.3.0


## Global Variables

In [4]:
from yelpapi import YelpAPI

API_KEY1 = "G1-37Mu0IHzYjviLF5gdpXJ_GdNZhKBCVuVLCfwJ5oRLivMCKpWrQMYTurfGuihavP0K7gOiLpH-0Ze75L96RjthUEoMukghlMqexUaziQ4agkeu6MfdtW5enpgvXHYx"
API_KEY2 = "fKGT-oiIvnk4B4rtfQsKTzQerIG6rtyC-qthL6COtUYxFJuioNn_sn_ty6l3FlIZidNg0uBZBvgwN4kudHJ4GtiQoOpcEu3U11Is0yI5neN7y1kTk0KKOuoqtMkvXHYx"

yelp_api = YelpAPI(API_KEY1)

In [None]:
# Expected useful business subcategories:
# Restaurants (restaurants, All)
# Food (food, All)
# Shopping (shopping, All)
# Arts & Entertainment (arts, All)
#     -> Art Galleries (galleries, All)
#     -> Country Clubs (countryclubs
#     -> Art Museums (artmuseums, 
#     -> Performing Arts (theater, All)
# Nightlife (nightlife, All)
# Beauty & Spas (beautysvc, All)
# Active Life 
#     -> Fitness & Instruction (fitness, All)
#     -> Golf

# valet, casual/formal attire

## Extracting Area Data from Yelp

### Storing data for one Yelp Search

In [48]:
class SearchData(object) :
    
    # Search parameters must always be given a location, for this project always a street address
    # For one location and one main_category of business
    
    def __init__(self, name, location, main_category, limit = 50, offset = 0, radius=1000,
                df=pd.DataFrame()) :
        self.name = name
        self.location = location
        self.limit = limit
        self.offset = offset
        self.radius = radius  # radius (meters) to search within, yelp sometimes grabs outside the radius
        self.main_category = main_category
        self.df = df
        self.sort_by = 'distance'

    # other optional parameters
#     term    # user inputted search term
#     price   # 1-4 for business' dollar signs
#     sort_by # default is best_match, other options include distance and review_count but these may 
    # limit max search results returned to only 40, per yelp documentation

    def printparams(self) :
        print(self.location, '= location, ', self.limit, '= limit, ', self.offset, '= offset, ',
        self.radius, '= radius, ', self.main_category, '= main_category')
        print('Dataframe size:', self.df.shape)
    
    def add50rows(self) :
        search_results = yelp_api.search_query(location=self.location, limit=self.limit, offset=self.offset,
                                               categories=self.main_category, sort_by=self.sort_by)
        prev_count = self.df.shape[0]
        max_distance = 0
        for business in search_results['businesses'] :
            self.df = self.df.append({tupl[0] : tupl[1] for tupl in business.items() if tupl[0] in features},
                                    ignore_index=True)
            if  business['distance'] > max_distance : max_distance = business['distance']
        print(self.df.shape[0] - prev_count, 'rows added to', self.name)
        return max_distance
    
    def addyelpdata(self) :
        end = False
        while (self.offset < 1000) & (not end) :
            try :
                max_distance = self.add50rows()
                if max_distance > 1000 : end = True
                self.offset += self.limit
            except : # usually this means at end of yelp search results or hit 1000 result hard limit
                end = True
        self.df['main_category'] = self.main_category

### Function test cases

In [60]:
testlocation3 = '125 Summer Street, Boston, MA'
test3 = SearchData('test3', testlocation3, 'shopping')
test3.addyelpdata()

50 rows added to test3
50 rows added to test3
50 rows added to test3
50 rows added to test3
50 rows added to test3
50 rows added to test3
50 rows added to test3
50 rows added to test3


In [61]:
test3.df[['alias']].duplicated().sum()

0

In [64]:
test3.df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 8 columns):
alias           400 non-null object
categories      400 non-null object
coordinates     400 non-null object
distance        400 non-null float64
price           271 non-null object
rating          400 non-null float64
review_count    400 non-null float64
url             400 non-null object
dtypes: float64(3), object(5)
memory usage: 25.1+ KB


In [62]:
test3.df[test3.df['price'].isnull()]

Unnamed: 0,alias,categories,coordinates,distance,price,rating,review_count,url
4,martins-news-shops-boston-6,"[{'alias': 'mags', 'title': 'Newspapers & Maga...","{'latitude': 42.3523280591267, 'longitude': -7...",119.884831,,1.0,3.0,https://www.yelp.com/biz/martins-news-shops-bo...
6,studio-verticale-boston,"[{'alias': 'homedecor', 'title': 'Home Decor'}...","{'latitude': 42.3524858, 'longitude': -71.0593...",161.854397,,5.0,2.0,https://www.yelp.com/biz/studio-verticale-bost...
10,marquis-leathers-boston,"[{'alias': 'leather', 'title': 'Leather Goods'}]","{'latitude': 42.3512435, 'longitude': -71.0571...",192.235456,,3.0,1.0,https://www.yelp.com/biz/marquis-leathers-bost...
13,sheehans-church-goods-company-boston,"[{'alias': 'religiousitems', 'title': 'Religio...","{'latitude': 42.3539123535156, 'longitude': -7...",204.499002,,5.0,1.0,https://www.yelp.com/biz/sheehans-church-goods...
15,kabloom-boston-2,"[{'alias': 'florists', 'title': 'Florists'}]","{'latitude': 42.3510953038931, 'longitude': -7...",223.020636,,5.0,1.0,https://www.yelp.com/biz/kabloom-boston-2?adju...
19,tangs-furniture-boston-2,"[{'alias': 'furniture', 'title': 'Furniture St...","{'latitude': 42.35093, 'longitude': -71.05817}",225.83732,,1.0,7.0,https://www.yelp.com/biz/tangs-furniture-bosto...
25,campus-camera-and-electronics-boston,"[{'alias': 'photographystores', 'title': 'Phot...","{'latitude': 42.35067, 'longitude': -71.05681}",259.758424,,1.0,1.0,https://www.yelp.com/biz/campus-camera-and-ele...
27,sulgrave-news-boston,"[{'alias': 'media', 'title': 'Books, Mags, Mus...","{'latitude': 42.35527, 'longitude': -71.05766}",261.694182,,5.0,1.0,https://www.yelp.com/biz/sulgrave-news-boston?...
29,pshycic-readings-by-stacy-boston,"[{'alias': 'cosmetics', 'title': 'Cosmetics & ...","{'latitude': 42.3504295, 'longitude': -71.0577...",275.778178,,1.0,1.0,https://www.yelp.com/biz/pshycic-readings-by-s...
31,oriental-fortune-giftland-boston,"[{'alias': 'giftshops', 'title': 'Gift Shops'}]","{'latitude': 42.3531312, 'longitude': -71.0608...",284.285236,,3.5,2.0,https://www.yelp.com/biz/oriental-fortune-gift...


In [65]:
test3.df[~test3.df['price'].isnull()]['review_count'].mean()

26.276752767527675

In [66]:
test3.df[~test3.df['price'].isnull()]['price'].value_counts()

$$      150
$$$      65
$        31
$$$$     25
Name: price, dtype: int64

In [49]:
testlocation = "117 Vine Street Lexington MA"
test2 = SearchData('test2', testlocation, 'shopping')
test2.printparams()

117 Vine Street Lexington MA = location,  50 = limit,  0 = offset,  1000 = radius,  shopping = main_category
Dataframe size: (0, 0)


In [50]:
test2.addyelpdata()

50 rows added to test2


In [52]:
test2.df

Unnamed: 0,alias,categories,coordinates,distance,price,rating,url
0,emilia-creations-lexington,"[{'alias': 'bridal', 'title': 'Bridal'}, {'ali...","{'latitude': 42.4463804, 'longitude': -71.2247...",753.646467,$$$,5.0,https://www.yelp.com/biz/emilia-creations-lexi...
1,big-picture-framing-lexington,"[{'alias': 'framing', 'title': 'Framing'}]","{'latitude': 42.4466054, 'longitude': -71.225207}",779.923995,,5.0,https://www.yelp.com/biz/big-picture-framing-l...
2,hitrons-lexington,"[{'alias': 'mattresses', 'title': 'Mattresses'...","{'latitude': 42.44632, 'longitude': -71.22583}",822.302169,$$$,5.0,https://www.yelp.com/biz/hitrons-lexington?adj...
3,ride-studio-cafe-lexington,"[{'alias': 'coffee', 'title': 'Coffee & Tea'},...","{'latitude': 42.4468886050926, 'longitude': -7...",859.454554,$$,4.0,https://www.yelp.com/biz/ride-studio-cafe-lexi...
4,two-aprons-cookery-lexington-2,"[{'alias': 'cookingclasses', 'title': 'Cooking...","{'latitude': 42.454226, 'longitude': -71.221343}",874.291357,,5.0,https://www.yelp.com/biz/two-aprons-cookery-le...
5,shemin-nursuries-lexington,"[{'alias': 'gardening', 'title': 'Nurseries & ...","{'latitude': 42.4400101, 'longitude': -71.2115...",895.937868,$$$$,2.5,https://www.yelp.com/biz/shemin-nursuries-lexi...
6,seasons-four-lexington,"[{'alias': 'furniture', 'title': 'Furniture St...","{'latitude': 42.440016, 'longitude': -71.211575}",895.937868,$$$,3.5,https://www.yelp.com/biz/seasons-four-lexingto...
7,cvs-pharmacy-lexington-24,"[{'alias': 'drugstores', 'title': 'Drugstores'}]","{'latitude': 42.4473596, 'longitude': -71.2268...",909.370867,$$,2.5,https://www.yelp.com/biz/cvs-pharmacy-lexingto...
8,artinian-jewelry-lexington-2,"[{'alias': 'jewelry', 'title': 'Jewelry'}]","{'latitude': 42.4470482, 'longitude': -71.22715}",926.108604,$$,4.5,https://www.yelp.com/biz/artinian-jewelry-lexi...
9,greater-boston-running-company-lexington-lexin...,"[{'alias': 'sportswear', 'title': 'Sports Wear...","{'latitude': 42.447065, 'longitude': -71.2268863}",931.741693,$$,4.0,https://www.yelp.com/biz/greater-boston-runnin...


In [38]:
testlocation = "117 Vine Street Lexington MA"
test1 = SearchData('test1', testlocation, 'shopping')
test1.printparams()

117 Vine Street Lexington MA = location,  50 = limit,  0 = offset,  1000 = radius,  shopping = main_category
Dataframe size: (0, 0)


In [39]:
test1.addyelpdata()

50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1
50 rows added to test1


In [43]:
test1.df[test1.df['price'].isnull()]

Unnamed: 0,alias,categories,coordinates,distance,price,rating,url
39,fine-fit-fashion-and-tailoring-burlington,"[{'alias': 'sewingalterations', 'title': 'Sewi...","{'latitude': 42.4972453, 'longitude': -71.1948...",5790.317382,,4.5,https://www.yelp.com/biz/fine-fit-fashion-and-...
101,le-vision-woburn,"[{'alias': 'optometrists', 'title': 'Optometri...","{'latitude': 42.468953, 'longitude': -71.177214}",3961.367579,,4.5,https://www.yelp.com/biz/le-vision-woburn?adju...
106,henna-cafe-newton,"[{'alias': 'hennaartists', 'title': 'Henna Art...","{'latitude': 42.3401794238955, 'longitude': -7...",11967.093929,,5.0,https://www.yelp.com/biz/henna-cafe-newton?adj...
128,tully-ink-boston-2,"[{'alias': 'screen_printing_tshirt_printing', ...","{'latitude': 42.3544129, 'longitude': -71.1328...",12392.853831,,5.0,https://www.yelp.com/biz/tully-ink-boston-2?ad...
160,rare-moving-and-trucking-boston,"[{'alias': 'movers', 'title': 'Movers'}, {'ali...","{'latitude': 42.28383, 'longitude': -71.06801}",21875.205891,,4.5,https://www.yelp.com/biz/rare-moving-and-truck...
180,frameworks-burlington-3,"[{'alias': 'framing', 'title': 'Framing'}]","{'latitude': 42.48541, 'longitude': -71.18828}",4757.376221,,5.0,https://www.yelp.com/biz/frameworks-burlington...
231,metropolitan-furniture-burlington,"[{'alias': 'interiordesign', 'title': 'Interio...","{'latitude': 42.472021, 'longitude': -71.210831}",2754.870924,,4.5,https://www.yelp.com/biz/metropolitan-furnitur...
235,five-stars-movers-brighton-5,"[{'alias': 'movers', 'title': 'Movers'}, {'ali...","{'latitude': 42.351519, 'longitude': -71.174146}",11208.575068,,3.5,https://www.yelp.com/biz/five-stars-movers-bri...
240,locks-and-keys-woburn-6,"[{'alias': 'locksmiths', 'title': 'Keys & Lock...","{'latitude': 42.504966, 'longitude': -71.13391}",9267.828149,,3.5,https://www.yelp.com/biz/locks-and-keys-woburn...
243,adams-fireplace-shop-cambridge,"[{'alias': 'lighting', 'title': 'Lighting Fixt...","{'latitude': 42.3872926, 'longitude': -71.1415...",9051.435927,,5.0,https://www.yelp.com/biz/adams-fireplace-shop-...


In [47]:
test1.df[['alias']].duplicated().sum()

0

In [20]:
test = {tupl[0] : tupl[1] for tupl in search_results['businesses'][0].items() if tupl[0] in features}
test

{'alias': 'ride-studio-cafe-lexington',
 'url': 'https://www.yelp.com/biz/ride-studio-cafe-lexington?adjust_creative=Lt6caCYWdiuAzrXR2HVsuA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=Lt6caCYWdiuAzrXR2HVsuA',
 'categories': [{'alias': 'coffee', 'title': 'Coffee & Tea'},
  {'alias': 'bikes', 'title': 'Bikes'},
  {'alias': 'bike_repair_maintenance', 'title': 'Bike Repair/Maintenance'}],
 'rating': 4.0,
 'coordinates': {'latitude': 42.4468886050926, 'longitude': -71.2262222272653},
 'price': '$$',
 'distance': 859.4545537437085}

In [7]:
search_results = yelp_api.search_query(categories=['shopping'], location="117 Vine Street Lexington MA")
search_results

{'businesses': [{'id': 'ns8ftm5tQsjmVgolXj2Dmg',
   'alias': 'ride-studio-cafe-lexington',
   'name': 'Ride Studio Cafe',
   'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/uzJL_W4h_AekVxbm_7iWyg/o.jpg',
   'is_closed': False,
   'url': 'https://www.yelp.com/biz/ride-studio-cafe-lexington?adjust_creative=Lt6caCYWdiuAzrXR2HVsuA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=Lt6caCYWdiuAzrXR2HVsuA',
   'review_count': 77,
   'categories': [{'alias': 'coffee', 'title': 'Coffee & Tea'},
    {'alias': 'bikes', 'title': 'Bikes'},
    {'alias': 'bike_repair_maintenance', 'title': 'Bike Repair/Maintenance'}],
   'rating': 4.0,
   'coordinates': {'latitude': 42.4468886050926,
    'longitude': -71.2262222272653},
   'transactions': [],
   'price': '$$',
   'location': {'address1': '1720 Massachusetts Ave',
    'address2': '',
    'address3': '',
    'city': 'Lexington',
    'zip_code': '02421',
    'country': 'US',
    'state': 'MA',
    'display_address': ['1720 Mas

### All Master Data for one Area

In [None]:
class AreaData(object) :
    
