# Data Analysis: Businesses
## Analysis of the businesses and checkin dataset
2 fields are problematic: 'attributes' and 'categories':
* Attributes are unstructured data about a venue.
* Not all businesses are restaurants.

In [1]:
from datetime import datetime
from pathlib import Path
from sklearn import preprocessing

import numpy as np

from src.data.data_reader import DataReader
import os
import pandas as pd
import re
import json

while str(os.getcwd())[-3:] != 'src':  # Execute from src-directory root
    os.chdir('..')

In [2]:
# Get businesses dataframe as presented in the data set
entries = DataReader._get_entries_from_file(Path('..', 'data', DataReader.EXPECTED_FILES[0]))
filtered_business_fields = [
    'business_id',
    'name',
    'city',
    'stars',
    'review_count',
    'attributes',  # Filtered in _parse_categories()
    'categories'  # Filtered in _parse_categories()
]
filtered_entries = DataReader._filter_entries(entries, filtered_business_fields)
businesses = pd.DataFrame.from_records(filtered_entries)
businesses

Unnamed: 0,business_id,name,city,stars,review_count,attributes,categories
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ",Santa Barbara,5.0,7,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop..."
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,Affton,3.0,15,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma..."
2,tUFrWirKiKi_TAnsVWINQQ,Target,Tucson,3.5,22,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G..."
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,Philadelphia,4.0,80,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,Green Lane,4.5,13,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food"
...,...,...,...,...,...,...,...
150341,IUQopTMmYQG-qRtBk-8QnA,Binh's Nails,Edmonton,3.0,13,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","Nail Salons, Beauty & Spas"
150342,c8GjPIOTGVmIemT7j5_SyQ,Wild Birds Unlimited,Nashville,4.0,5,"{'BusinessAcceptsCreditCards': 'True', 'Restau...","Pets, Nurseries & Gardening, Pet Stores, Hobby..."
150343,_QAMST-NrQobXduilWEqSw,Claire's Boutique,Indianapolis,3.5,8,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...","Shopping, Jewelry, Piercing, Toy Stores, Beaut..."
150344,mtGm22y5c2UHNXDFAjaPNw,Cyclery & Fitness Center,Edwardsville,4.0,24,"{'BusinessParking': '{'garage': False, 'street...","Fitness/Exercise Equipment, Eyewear & Optician..."


### Normalisation of available data

In [3]:
businesses = businesses.rename(columns={'stars': 'average_stars'})
column_names_to_normalise = ['average_stars', 'review_count']
normalised_series = [
    pd.Series(
        data =
        preprocessing.MinMaxScaler().fit_transform(
            businesses[column_name].to_numpy().reshape(-1, 1)
        ).flatten(),
        name = f'business_{column_name}_normalised',
        dtype = np.float16,
    ).set_axis(businesses.index)  # To relink with the original dataframe
    for column_name in column_names_to_normalise
]
businesses = businesses.drop(columns=column_names_to_normalise)
businesses = pd.concat([businesses, *normalised_series], axis=1)
businesses

Unnamed: 0,business_id,name,city,attributes,categories,business_average_stars_normalised,business_review_count_normalised
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ",Santa Barbara,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",1.000,0.000264
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,Affton,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...",0.500,0.001322
2,tUFrWirKiKi_TAnsVWINQQ,Target,Tucson,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...",0.625,0.002247
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,Philadelphia,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...",0.750,0.009918
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,Green Lane,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food",0.875,0.001058
...,...,...,...,...,...,...,...
150341,IUQopTMmYQG-qRtBk-8QnA,Binh's Nails,Edmonton,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","Nail Salons, Beauty & Spas",0.500,0.001058
150342,c8GjPIOTGVmIemT7j5_SyQ,Wild Birds Unlimited,Nashville,"{'BusinessAcceptsCreditCards': 'True', 'Restau...","Pets, Nurseries & Gardening, Pet Stores, Hobby...",0.750,0.000000
150343,_QAMST-NrQobXduilWEqSw,Claire's Boutique,Indianapolis,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...","Shopping, Jewelry, Piercing, Toy Stores, Beaut...",0.625,0.000397
150344,mtGm22y5c2UHNXDFAjaPNw,Cyclery & Fitness Center,Edwardsville,"{'BusinessParking': '{'garage': False, 'street...","Fitness/Exercise Equipment, Eyewear & Optician...",0.750,0.002512


## Data Selection
Let's find all distinct values for categories and create a whitelist of every category directly related to restaurants.

In [4]:
# All possible values in 'categories' column
{category for categories_string in businesses['categories'] if categories_string for category in categories_string.split(", ")}

{'Waxing',
 'Embassy',
 '3D Printing',
 'Golf Cart Dealers',
 'Generator Installation/Repair',
 'Honey',
 'Town Hall',
 'Metal Detector Services',
 'Cardiologists',
 'Water Parks',
 'Car Window Tinting',
 'Golf Equipment Shops',
 'Boat Dealers',
 'Brazilian Jiu-jitsu',
 'Trainers',
 'Dominican',
 'Television Service Providers',
 'Party Bus Rentals',
 'Poke',
 'Cafes',
 'Fashion',
 'Private Investigation',
 'Art Supplies',
 'Indoor Landscaping',
 'Bocce Ball',
 'Burmese',
 'Real Estate Photography',
 'Furniture Stores',
 'Sewing & Alterations',
 'Faith-based Crisis Pregnancy Centers',
 'Paragliding',
 'Sauna Installation & Repair',
 'Fertility',
 'Cigar Bars',
 'Shoe Repair',
 'Country Clubs',
 'Health Retreats',
 'Cheese Shops',
 'Honduran',
 'Gay Bars',
 'Lactation Services',
 'Bus Rental',
 'DUI Schools',
 'Plumbing',
 'Wheel & Rim Repair',
 'Supernatural Readings',
 'Tennis',
 'Fabric Stores',
 'Ski & Snowboard Shops',
 'Car Stereo Installation',
 'Signmaking',
 'Dance Wear',
 'Medi

In [5]:
# Whitelist attempt 1
# All (manually curated) restaurant-like tags
categories_whitelist_1 = {
    "Food Court",
    "Steakhouses",
    "Brasseries",
    "Gastropubs",
    "Tapas Bars",
    "Diners",
    "Buffets",
    "Food Trucks",
    "Restaurants",
    "Fast Food",
    "Food Stands",
    "Dinner Theater",
}
businesses['categories_whitelist_1'] = [
    set(category_group.split(", "))  # Convert string of all categories to a set of individual categories
    if category_group and set(category_group.split(", ")).intersection(categories_whitelist_1)  # If in whitelist
    else None  # No category is provided by Yelp, or no category is in the whitelist
    for category_group in businesses['categories']
]
businesses_whitelist_1_categories = businesses.loc[businesses['categories_whitelist_1'].notnull()]  # Remove businesses with no categories listed

In [6]:
# Whitelist attempt 2
# Only Food Trucks & Restaurants
categories_whitelist_2 = {
    "Food Trucks",  # Data exploration shows that all restaurant-like businesses
    "Restaurants",  # either have the category "Food Truck" or "Restaurant".
}  # Only keep businesses that contain at least 1 of the categories in this whitelist
businesses['categories_whitelist_2'] = [
    set(category_group.split(", "))  # Convert string of all categories to a set of individual categories
    if category_group and set(category_group.split(", ")).intersection(categories_whitelist_2)  # If in whitelist
    else None  # No category is provided by Yelp, or no category is in the whitelist
    for category_group in businesses['categories']
]
businesses_whitelist_2_categories = businesses.loc[businesses['categories_whitelist_2'].notnull()]  # Remove businesses with no categories listed

In [7]:
print(f"The length of the manually curated list of categories is equal to only checking for 'restaurant' and 'Food Trucks': {len(businesses_whitelist_1_categories)} == {len(businesses_whitelist_2_categories)}.\nThe same rows were selected by each query.")
businesses

The length of the manually curated list of categories is equal to only checking for 'restaurant' and 'Food Trucks': 52533 == 52533.
The same rows were selected by each query.


Unnamed: 0,business_id,name,city,attributes,categories,business_average_stars_normalised,business_review_count_normalised,categories_whitelist_1,categories_whitelist_2
0,Pns2l4eNsfO8kk83dixA6A,"Abby Rappoport, LAC, CMQ",Santa Barbara,{'ByAppointmentOnly': 'True'},"Doctors, Traditional Chinese Medicine, Naturop...",1.000,0.000264,,
1,mpf3x-BjTdTEA3yCZrAYPw,The UPS Store,Affton,{'BusinessAcceptsCreditCards': 'True'},"Shipping Centers, Local Services, Notaries, Ma...",0.500,0.001322,,
2,tUFrWirKiKi_TAnsVWINQQ,Target,Tucson,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Department Stores, Shopping, Fashion, Home & G...",0.625,0.002247,,
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,Philadelphia,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...",0.750,0.009918,"{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{Bakeries, Food, Bubble Tea, Coffee & Tea, Res..."
4,mWMc6_wTdE0EUBKIGXDVfA,Perkiomen Valley Brewery,Green Lane,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","Brewpubs, Breweries, Food",0.875,0.001058,,
...,...,...,...,...,...,...,...,...,...
150341,IUQopTMmYQG-qRtBk-8QnA,Binh's Nails,Edmonton,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...","Nail Salons, Beauty & Spas",0.500,0.001058,,
150342,c8GjPIOTGVmIemT7j5_SyQ,Wild Birds Unlimited,Nashville,"{'BusinessAcceptsCreditCards': 'True', 'Restau...","Pets, Nurseries & Gardening, Pet Stores, Hobby...",0.750,0.000000,,
150343,_QAMST-NrQobXduilWEqSw,Claire's Boutique,Indianapolis,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...","Shopping, Jewelry, Piercing, Toy Stores, Beaut...",0.625,0.000397,,
150344,mtGm22y5c2UHNXDFAjaPNw,Cyclery & Fitness Center,Edwardsville,"{'BusinessParking': '{'garage': False, 'street...","Fitness/Exercise Equipment, Eyewear & Optician...",0.750,0.002512,,


We conclude that the categories_whitelist_2 will be used for filtering.
The second problem is the 'attributes' field. It contains a JSON with unstructured subfields. We will now discover which of these fields are useable for a recommender system.
First, we discover all distinct subfields in the 'attribute' field.

In [8]:
# Get set of all possbible attributes
from collections import Counter
all_keys = Counter()
for business_attributes in businesses['attributes']:
    if business_attributes is not None:
        keys = business_attributes.keys()
        all_keys.update(keys)
print(f'There are {len(businesses)} businesses')
all_keys.most_common()  # Sort by most common values

There are 150346 businesses


[('BusinessAcceptsCreditCards', 119765),
 ('BusinessParking', 91085),
 ('RestaurantsPriceRange2', 85314),
 ('BikeParking', 72638),
 ('RestaurantsTakeOut', 59857),
 ('WiFi', 56914),
 ('RestaurantsDelivery', 56282),
 ('GoodForKids', 53375),
 ('OutdoorSeating', 48802),
 ('RestaurantsReservations', 45247),
 ('HasTV', 45084),
 ('Ambience', 44279),
 ('RestaurantsGoodForGroups', 44170),
 ('Alcohol', 43189),
 ('ByAppointmentOnly', 42339),
 ('Caters', 40127),
 ('RestaurantsAttire', 39255),
 ('NoiseLevel', 37993),
 ('GoodForMeal', 29087),
 ('WheelchairAccessible', 28953),
 ('RestaurantsTableService', 19982),
 ('DogsAllowed', 18284),
 ('BusinessAcceptsBitcoin', 17430),
 ('HappyHour', 15171),
 ('DriveThru', 7760),
 ('Music', 7521),
 ('AcceptsInsurance', 5713),
 ('BestNights', 5694),
 ('CoatCheck', 5584),
 ('GoodForDancing', 4628),
 ('Smoking', 4567),
 ('BYOB', 4451),
 ('Corkage', 3553),
 ('BYOBCorkage', 1444),
 ('HairSpecializesIn', 1065),
 ('AgesAllowed', 129),
 ('Open24Hours', 39),
 ('DietaryRes

We now manually select the fields that might be of interest for a recommender system:

In [9]:
businesses_attributes_filtered = []

filtered_attributes_single = {
    'RestaurantsTakeOut',
    'RestaurantsDelivery',
    'RestaurantsPriceRange2',
    'GoodForKids',
    'RestaurantsGoodForGroups',
    'NoiseLevel'
}
filtered_attributes_multi = {
    'Ambience',
    'GoodForMeal'
}

for business_attributes in businesses['attributes']:
    parsed_business_attributes = {}
    if business_attributes is not None:
        for attribute_key, attribute_value in business_attributes.items():
            if attribute_key in filtered_attributes_multi and attribute_value.startswith('{'):  # Attribute is again a dict
                json_string =  re.sub(
                    ', u"',
                    ', "',
                    attribute_value.replace('\'', '\"').lower().replace('none', 'null')
                ).replace('{u', '{')  # The provided JSON dict is not entirely up-to-spec
                sub_attributes = json.loads(json_string)
                for sub_key, sub_value in sub_attributes.items():
                    parsed_business_attributes[sub_key] = sub_value
            elif attribute_key in filtered_attributes_single:
                parsed_business_attributes[attribute_key] = attribute_value
    businesses_attributes_filtered.append(parsed_business_attributes)


businesses['attributes_filtered'] = businesses_attributes_filtered
businesses[['attributes', 'attributes_filtered']]

Unnamed: 0,attributes,attributes_filtered
0,{'ByAppointmentOnly': 'True'},{}
1,{'BusinessAcceptsCreditCards': 'True'},{}
2,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","{'RestaurantsPriceRange2': '2', 'RestaurantsTa..."
3,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","{'RestaurantsDelivery': 'False', 'RestaurantsP..."
4,"{'BusinessAcceptsCreditCards': 'True', 'Wheelc...","{'RestaurantsTakeOut': 'True', 'GoodForKids': ..."
...,...,...
150341,"{'ByAppointmentOnly': 'False', 'RestaurantsPri...",{'RestaurantsPriceRange2': '3'}
150342,"{'BusinessAcceptsCreditCards': 'True', 'Restau...",{'RestaurantsPriceRange2': '2'}
150343,"{'RestaurantsPriceRange2': '1', 'BusinessAccep...",{'RestaurantsPriceRange2': '1'}
150344,"{'BusinessParking': '{'garage': False, 'street...","{'RestaurantsPriceRange2': '4', 'RestaurantsTa..."


Now we have selected all data to use for businesses. This data now needs to be reformatted to allow for easy input into a neural network.

## Data Transformation
Let's first see what we have at this moment...

In [10]:
businesses = businesses.dropna(subset=['categories_whitelist_2']).copy()  # Remove those with no categories, as these are not considered 'restaurant-like' and thus fall out-of-scope
businesses

Unnamed: 0,business_id,name,city,attributes,categories,business_average_stars_normalised,business_review_count_normalised,categories_whitelist_1,categories_whitelist_2,attributes_filtered
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,Philadelphia,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...",0.750,0.009918,"{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{'RestaurantsDelivery': 'False', 'RestaurantsP..."
5,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,Ashland City,"{'BusinessParking': 'None', 'BusinessAcceptsCr...","Burgers, Fast Food, Sandwiches, Food, Ice Crea...",0.250,0.000132,"{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{'RestaurantsTakeOut': 'True', 'GoodForKids': ..."
8,k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,Affton,"{'Caters': 'True', 'Alcohol': 'u'full_bar'', '...","Pubs, Restaurants, Italian, Bars, American (Tr...",0.500,0.001851,"{Pubs, American (Traditional), Bars, Italian, ...","{Pubs, American (Traditional), Bars, Italian, ...","{'RestaurantsDelivery': 'False', 'RestaurantsT..."
9,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,Nashville,"{'RestaurantsAttire': ''casual'', 'Restaurants...","Ice Cream & Frozen Yogurt, Fast Food, Burgers,...",0.125,0.000661,"{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{'RestaurantsGoodForGroups': 'False', 'GoodFor..."
11,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,Tampa Bay,"{'Alcohol': ''none'', 'OutdoorSeating': 'None'...","Vietnamese, Food, Restaurants, Food Trucks",0.750,0.000661,"{Food Trucks, Food, Vietnamese, Restaurants}","{Food Trucks, Food, Vietnamese, Restaurants}","{'touristy': False, 'hipster': False, 'romanti..."
...,...,...,...,...,...,...,...,...,...,...
150325,l9eLGG9ZKpLJzboZq-9LRQ,Wawa,Clifton Heights,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Restaurants, Sandwiches, Convenience Stores, C...",0.500,0.000793,"{Sandwiches, Food, Convenience Stores, Coffee ...","{Sandwiches, Food, Convenience Stores, Coffee ...","{'RestaurantsPriceRange2': '1', 'RestaurantsTa..."
150327,cM6V90ExQD6KMSU3rRB5ZA,Dutch Bros Coffee,Boise,"{'WiFi': ''free'', 'RestaurantsGoodForGroups':...","Cafes, Juice Bars & Smoothies, Coffee & Tea, R...",0.750,0.003702,"{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{'RestaurantsGoodForGroups': 'True', 'Restaura..."
150336,WnT9NIzQgLlILjPT0kEcsQ,Adelita Taqueria & Restaurant,Philadelphia,"{'WheelchairAccessible': 'False', 'Restaurants...","Restaurants, Mexican",0.875,0.003967,"{Mexican, Restaurants}","{Mexican, Restaurants}","{'GoodForKids': 'True', 'RestaurantsTakeOut': ..."
150339,2O2K6SXPWv56amqxCECd4w,The Plum Pit,Aston,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",0.875,0.001190,"{Event Planning & Services, Food, Food Trucks,...","{Event Planning & Services, Food, Food Trucks,...","{'RestaurantsDelivery': 'False', 'RestaurantsP..."


In [11]:
all_remaining_categories = (category for business_categories in businesses['categories_whitelist_2'] for category in business_categories)
categories_appearances = Counter(all_remaining_categories)
categories_appearances.most_common()

[('Restaurants', 52268),
 ('Food', 15737),
 ('Nightlife', 8730),
 ('Sandwiches', 8366),
 ('Bars', 8342),
 ('American (Traditional)', 8139),
 ('Pizza', 7093),
 ('Fast Food', 6472),
 ('Breakfast & Brunch', 6239),
 ('American (New)', 6097),
 ('Burgers', 5636),
 ('Mexican', 4600),
 ('Italian', 4573),
 ('Coffee & Tea', 4075),
 ('Seafood', 3539),
 ('Chinese', 3169),
 ('Event Planning & Services', 3110),
 ('Salad', 3064),
 ('Chicken Wings', 2966),
 ('Cafes', 2756),
 ('Delis', 2393),
 ('Caterers', 2099),
 ('Specialty Food', 2030),
 ('Bakeries', 1906),
 ('Desserts', 1874),
 ('Japanese', 1830),
 ('Sports Bars', 1797),
 ('Sushi Bars', 1717),
 ('Barbeque', 1694),
 ('Asian Fusion', 1547),
 ('Steakhouses', 1506),
 ('Diners', 1494),
 ('Cocktail Bars', 1405),
 ('Pubs', 1397),
 ('Food Trucks', 1273),
 ('Mediterranean', 1263),
 ('Beer', 1158),
 ('Wine & Spirits', 1158),
 ('Vegetarian', 1158),
 ('Ice Cream & Frozen Yogurt', 1113),
 ('Arts & Entertainment', 1094),
 ('Soup', 1061),
 ('Juice Bars & Smoothie

In [12]:
# We will only keep the categories with a high occurence
common_categories = {item for item, count in categories_appearances.items() if count >= 500}
common_categories

{'American (New)',
 'American (Traditional)',
 'Arts & Entertainment',
 'Asian Fusion',
 'Bagels',
 'Bakeries',
 'Barbeque',
 'Bars',
 'Beer',
 'Beer Bar',
 'Breakfast & Brunch',
 'Buffets',
 'Burgers',
 'Cafes',
 'Cajun/Creole',
 'Caribbean',
 'Caterers',
 'Cheesesteaks',
 'Chicken Shop',
 'Chicken Wings',
 'Chinese',
 'Cocktail Bars',
 'Coffee & Tea',
 'Comfort Food',
 'Convenience Stores',
 'Delis',
 'Desserts',
 'Diners',
 'Ethnic Food',
 'Event Planning & Services',
 'Fast Food',
 'Food',
 'Food Delivery Services',
 'Food Trucks',
 'French',
 'Gastropubs',
 'Gluten-Free',
 'Greek',
 'Grocery',
 'Hot Dogs',
 'Ice Cream & Frozen Yogurt',
 'Indian',
 'Italian',
 'Japanese',
 'Juice Bars & Smoothies',
 'Latin American',
 'Lounges',
 'Mediterranean',
 'Mexican',
 'Middle Eastern',
 'Music Venues',
 'Nightlife',
 'Pizza',
 'Pubs',
 'Restaurants',
 'Salad',
 'Sandwiches',
 'Seafood',
 'Shopping',
 'Soul Food',
 'Soup',
 'Southern',
 'Specialty Food',
 'Sports Bars',
 'Steakhouses',
 'Sus

In [13]:
businesses['categories_whitelist_2_most_common'] = businesses['categories_whitelist_2'].map(common_categories.intersection)
all_remaining_categories = (category for business_categories in businesses['categories_whitelist_2_most_common'] for category in business_categories)
categories_appearances = Counter(all_remaining_categories)
print("The following categories will be one-hot encoded:")
categories_appearances.most_common()

The following categories will be one-hot encoded:


[('Restaurants', 52268),
 ('Food', 15737),
 ('Nightlife', 8730),
 ('Sandwiches', 8366),
 ('Bars', 8342),
 ('American (Traditional)', 8139),
 ('Pizza', 7093),
 ('Fast Food', 6472),
 ('Breakfast & Brunch', 6239),
 ('American (New)', 6097),
 ('Burgers', 5636),
 ('Mexican', 4600),
 ('Italian', 4573),
 ('Coffee & Tea', 4075),
 ('Seafood', 3539),
 ('Chinese', 3169),
 ('Event Planning & Services', 3110),
 ('Salad', 3064),
 ('Chicken Wings', 2966),
 ('Cafes', 2756),
 ('Delis', 2393),
 ('Caterers', 2099),
 ('Specialty Food', 2030),
 ('Bakeries', 1906),
 ('Desserts', 1874),
 ('Japanese', 1830),
 ('Sports Bars', 1797),
 ('Sushi Bars', 1717),
 ('Barbeque', 1694),
 ('Asian Fusion', 1547),
 ('Steakhouses', 1506),
 ('Diners', 1494),
 ('Cocktail Bars', 1405),
 ('Pubs', 1397),
 ('Food Trucks', 1273),
 ('Mediterranean', 1263),
 ('Beer', 1158),
 ('Wine & Spirits', 1158),
 ('Vegetarian', 1158),
 ('Ice Cream & Frozen Yogurt', 1113),
 ('Arts & Entertainment', 1094),
 ('Soup', 1061),
 ('Juice Bars & Smoothie

In [14]:
onehot_categories = [businesses['categories_whitelist_2_most_common'].map(lambda business_categories: 1 if category in business_categories else 0).rename(f"category_{category.replace(' ', '_').lower()}").astype(np.uint8) for category in categories_appearances.keys()]
businesses = pd.concat([businesses, *onehot_categories], axis=1)
businesses

Unnamed: 0,business_id,name,city,attributes,categories,business_average_stars_normalised,business_review_count_normalised,categories_whitelist_1,categories_whitelist_2,attributes_filtered,...,category_gluten-free,category_latin_american,category_comfort_food,category_vegetarian,category_indian,category_buffets,category_middle_eastern,category_tacos,category_cheesesteaks,category_grocery
3,MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,Philadelphia,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...",0.750,0.009918,"{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{'RestaurantsDelivery': 'False', 'RestaurantsP...",...,0,0,0,0,0,0,0,0,0,0
5,CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,Ashland City,"{'BusinessParking': 'None', 'BusinessAcceptsCr...","Burgers, Fast Food, Sandwiches, Food, Ice Crea...",0.250,0.000132,"{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{'RestaurantsTakeOut': 'True', 'GoodForKids': ...",...,0,0,0,0,0,0,0,0,0,0
8,k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,Affton,"{'Caters': 'True', 'Alcohol': 'u'full_bar'', '...","Pubs, Restaurants, Italian, Bars, American (Tr...",0.500,0.001851,"{Pubs, American (Traditional), Bars, Italian, ...","{Pubs, American (Traditional), Bars, Italian, ...","{'RestaurantsDelivery': 'False', 'RestaurantsT...",...,0,0,0,0,0,0,0,0,0,0
9,bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,Nashville,"{'RestaurantsAttire': ''casual'', 'Restaurants...","Ice Cream & Frozen Yogurt, Fast Food, Burgers,...",0.125,0.000661,"{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{'RestaurantsGoodForGroups': 'False', 'GoodFor...",...,0,0,0,0,0,0,0,0,0,0
11,eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,Tampa Bay,"{'Alcohol': ''none'', 'OutdoorSeating': 'None'...","Vietnamese, Food, Restaurants, Food Trucks",0.750,0.000661,"{Food Trucks, Food, Vietnamese, Restaurants}","{Food Trucks, Food, Vietnamese, Restaurants}","{'touristy': False, 'hipster': False, 'romanti...",...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150325,l9eLGG9ZKpLJzboZq-9LRQ,Wawa,Clifton Heights,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Restaurants, Sandwiches, Convenience Stores, C...",0.500,0.000793,"{Sandwiches, Food, Convenience Stores, Coffee ...","{Sandwiches, Food, Convenience Stores, Coffee ...","{'RestaurantsPriceRange2': '1', 'RestaurantsTa...",...,0,0,0,0,0,0,0,0,0,0
150327,cM6V90ExQD6KMSU3rRB5ZA,Dutch Bros Coffee,Boise,"{'WiFi': ''free'', 'RestaurantsGoodForGroups':...","Cafes, Juice Bars & Smoothies, Coffee & Tea, R...",0.750,0.003702,"{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{'RestaurantsGoodForGroups': 'True', 'Restaura...",...,0,0,0,0,0,0,0,0,0,0
150336,WnT9NIzQgLlILjPT0kEcsQ,Adelita Taqueria & Restaurant,Philadelphia,"{'WheelchairAccessible': 'False', 'Restaurants...","Restaurants, Mexican",0.875,0.003967,"{Mexican, Restaurants}","{Mexican, Restaurants}","{'GoodForKids': 'True', 'RestaurantsTakeOut': ...",...,0,0,0,0,0,0,0,0,0,0
150339,2O2K6SXPWv56amqxCECd4w,The Plum Pit,Aston,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",0.875,0.001190,"{Event Planning & Services, Food, Food Trucks,...","{Event Planning & Services, Food, Food Trucks,...","{'RestaurantsDelivery': 'False', 'RestaurantsP...",...,0,0,1,0,0,0,0,0,0,0


In [15]:
all_remaining_attributes = (attribute_key for business_attributes in businesses['attributes_filtered'] for attribute_key in business_attributes.keys())
attributes_appearances = Counter(all_remaining_attributes)
print('The following attributes will be one-hot encoded:')
attributes_appearances.most_common()

The following attributes will be one-hot encoded:


[('RestaurantsTakeOut', 48884),
 ('RestaurantsDelivery', 47816),
 ('RestaurantsPriceRange2', 44697),
 ('RestaurantsGoodForGroups', 41445),
 ('romantic', 41036),
 ('intimate', 41036),
 ('touristy', 41036),
 ('classy', 41036),
 ('trendy', 41036),
 ('casual', 41036),
 ('GoodForKids', 40992),
 ('hipster', 40982),
 ('upscale', 40932),
 ('divey', 39883),
 ('NoiseLevel', 34853),
 ('dessert', 28651),
 ('latenight', 28651),
 ('lunch', 28651),
 ('dinner', 28651),
 ('brunch', 28651),
 ('breakfast', 28651)]

In [16]:
onehot_attributes = [
    businesses['attributes_filtered']
        .map(
        lambda business_attributes:
        business_attributes[attribute] if attribute in business_attributes
        else None
        )
        .rename(f'attribute_{attribute.lower()}')
        .replace('None', None)
        .replace('True', True)
        .replace('False', False)
    for attribute, _ in attributes_appearances.most_common()  # Using the sorted list of attributes, since order matters
]
onehot_attributes = [series.map(lambda attributes: re.sub("^u'", "", attributes).replace("'", "") if isinstance(attributes, str) else attributes) for series in onehot_attributes]

print('The value "None" does occur semi-often. The actual value will be estimated with the middle value\n')
for i, col in enumerate(onehot_attributes):
    print(f'{i}.\t{col.name}: {Counter(col).most_common()}')

The value "None" does occur semi-often. The actual value will be estimated with the middle value

0.	attribute_restaurantstakeout: [(True, 45527), (None, 4736), (False, 2270)]
1.	attribute_restaurantsdelivery: [(True, 27603), (False, 17608), (None, 7322)]
2.	attribute_restaurantspricerange2: [('2', 23180), ('1', 19638), (None, 7852), ('3', 1667), ('4', 196)]
3.	attribute_restaurantsgoodforgroups: [(True, 35583), (None, 11108), (False, 5842)]
4.	attribute_romantic: [(False, 37276), (None, 14588), (True, 669)]
5.	attribute_intimate: [(False, 36305), (None, 15444), (True, 784)]
6.	attribute_touristy: [(False, 37386), (None, 14864), (True, 283)]
7.	attribute_classy: [(False, 31962), (None, 14866), (True, 5705)]
8.	attribute_trendy: [(False, 33724), (None, 16462), (True, 2347)]
9.	attribute_casual: [(True, 20066), (False, 19024), (None, 13443)]
10.	attribute_goodforkids: [(True, 34794), (None, 11563), (False, 6176)]
11.	attribute_hipster: [(False, 36033), (None, 15553), (True, 947)]
12.	att

In [17]:
for index in range(len(onehot_attributes)):
    if index == 2:  # attribute_restaurantspricerange2
        onehot_attributes[index] = onehot_attributes[index].map(lambda x: 0 if x == '1' else (0.33 if x == '2' else (0.67 if x == '3' else (1 if x == '4' else 0.33)))).astype(np.float16) # '2' seems to be the most common value, thus default
    elif index == 14:  # attribute_noiselevel
        onehot_attributes[index] = onehot_attributes[index].map(lambda x: 0 if x == 'quiet' else (0.33 if x == 'average' else (0.67 if x == 'loud' else (1 if x == 'very_loud' else 0.33)))).astype(np.float16)  # 'average' is the default value
    else:
        onehot_attributes[index] = onehot_attributes[index].map(lambda x: 1 if x is True else (0 if x is False else 0.5)).astype(np.float16)



In [18]:
# Adding one-hot encoded attributes to the dataframe
businesses = pd.concat([businesses, *onehot_attributes], axis=1)
businesses = businesses.set_index('business_id')
businesses

Unnamed: 0_level_0,name,city,attributes,categories,business_average_stars_normalised,business_review_count_normalised,categories_whitelist_1,categories_whitelist_2,attributes_filtered,categories_whitelist_2_most_common,...,attribute_hipster,attribute_upscale,attribute_divey,attribute_noiselevel,attribute_dessert,attribute_latenight,attribute_lunch,attribute_dinner,attribute_brunch,attribute_breakfast
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,Philadelphia,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...",0.750,0.009918,"{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{'RestaurantsDelivery': 'False', 'RestaurantsP...","{Food, Bakeries, Coffee & Tea, Restaurants}",...,0.5,0.5,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5
CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,Ashland City,"{'BusinessParking': 'None', 'BusinessAcceptsCr...","Burgers, Fast Food, Sandwiches, Food, Ice Crea...",0.250,0.000132,"{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{'RestaurantsTakeOut': 'True', 'GoodForKids': ...","{Burgers, Sandwiches, Ice Cream & Frozen Yogur...",...,0.5,0.5,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5
k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,Affton,"{'Caters': 'True', 'Alcohol': 'u'full_bar'', '...","Pubs, Restaurants, Italian, Bars, American (Tr...",0.500,0.001851,"{Pubs, American (Traditional), Bars, Italian, ...","{Pubs, American (Traditional), Bars, Italian, ...","{'RestaurantsDelivery': 'False', 'RestaurantsT...","{Pubs, American (Traditional), Bars, Italian, ...",...,0.0,0.0,0.0,0.330078,0.5,0.5,0.5,0.5,0.5,0.5
bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,Nashville,"{'RestaurantsAttire': ''casual'', 'Restaurants...","Ice Cream & Frozen Yogurt, Fast Food, Burgers,...",0.125,0.000661,"{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{'RestaurantsGoodForGroups': 'False', 'GoodFor...","{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...",...,0.5,0.5,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5
eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,Tampa Bay,"{'Alcohol': ''none'', 'OutdoorSeating': 'None'...","Vietnamese, Food, Restaurants, Food Trucks",0.750,0.000661,"{Food Trucks, Food, Vietnamese, Restaurants}","{Food Trucks, Food, Vietnamese, Restaurants}","{'touristy': False, 'hipster': False, 'romanti...","{Restaurants, Food, Vietnamese, Food Trucks}",...,0.0,0.0,0.0,0.330078,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
l9eLGG9ZKpLJzboZq-9LRQ,Wawa,Clifton Heights,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Restaurants, Sandwiches, Convenience Stores, C...",0.500,0.000793,"{Sandwiches, Food, Convenience Stores, Coffee ...","{Sandwiches, Food, Convenience Stores, Coffee ...","{'RestaurantsPriceRange2': '1', 'RestaurantsTa...","{Sandwiches, Food, Convenience Stores, Coffee ...",...,0.5,0.5,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5
cM6V90ExQD6KMSU3rRB5ZA,Dutch Bros Coffee,Boise,"{'WiFi': ''free'', 'RestaurantsGoodForGroups':...","Cafes, Juice Bars & Smoothies, Coffee & Tea, R...",0.750,0.003702,"{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{'RestaurantsGoodForGroups': 'True', 'Restaura...","{Juice Bars & Smoothies, Food, Cafes, Coffee &...",...,0.0,0.0,0.0,0.669922,0.0,0.0,0.0,0.0,0.0,0.0
WnT9NIzQgLlILjPT0kEcsQ,Adelita Taqueria & Restaurant,Philadelphia,"{'WheelchairAccessible': 'False', 'Restaurants...","Restaurants, Mexican",0.875,0.003967,"{Mexican, Restaurants}","{Mexican, Restaurants}","{'GoodForKids': 'True', 'RestaurantsTakeOut': ...","{Mexican, Restaurants}",...,0.0,0.5,0.5,0.330078,1.0,0.5,1.0,1.0,0.5,0.5
2O2K6SXPWv56amqxCECd4w,The Plum Pit,Aston,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",0.875,0.001190,"{Event Planning & Services, Food, Food Trucks,...","{Event Planning & Services, Food, Food Trucks,...","{'RestaurantsDelivery': 'False', 'RestaurantsP...","{Event Planning & Services, Food, Food Trucks,...",...,0.5,0.5,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5


# Check-ins also contain data for businesses

In [25]:
entries = DataReader._get_entries_from_file(Path('..', 'data', DataReader.EXPECTED_FILES[1]))
filtered_entries = DataReader._filter_entries(entries, DataReader.RELEVANT_CHECKIN_FIELDS)
checkins: pd.DataFrame = pd.DataFrame.from_records(filtered_entries)
checkins['date'] = checkins['date'].map(lambda datelist: [datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S') for date_str in datelist.split(', ')])

In [26]:
first_checkins = checkins['date'].map(lambda datelist: min(datelist))  # First check-in per restaurant
last_checkin = checkins['date'].map(lambda datelist: max(datelist)).max()  # Last check-in date found in entire dataset
amount_of_weeks = (last_checkin - first_checkins).map(lambda x: x.days / 7)  # Amount of weeks between first check-in and last possible check-in
amount_of_checkins = checkins['date'].transform(len)
average_checkins_per_week = (amount_of_checkins / amount_of_weeks).replace([np.inf, -np.inf, np.nan], 0)
average_checkins_per_week_normalised = pd.Series(data=preprocessing.MinMaxScaler().fit_transform(average_checkins_per_week.to_numpy().reshape(-1, 1)).flatten(), name="average_checkins_per_week_normalised")

checkins = pd.concat([checkins, average_checkins_per_week_normalised], axis=1)
# checkins = checkins.drop(columns=['date']) TODO: terug aanzetten
checkins = checkins.set_index('business_id')

checkins

In [21]:
businesses = businesses.join(checkins, on='business_id')
businesses['average_checkins_per_week_normalised'] = businesses['average_checkins_per_week_normalised'].replace([np.nan], 0)

Unnamed: 0_level_0,name,city,attributes,categories,business_average_stars_normalised,business_review_count_normalised,categories_whitelist_1,categories_whitelist_2,attributes_filtered,categories_whitelist_2_most_common,...,attribute_divey,attribute_noiselevel,attribute_dessert,attribute_latenight,attribute_lunch,attribute_dinner,attribute_brunch,attribute_breakfast,date,average_checkins_per_week_normalised
business_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
MTSW4McQd7CbVtyjqoe9mw,St Honore Pastries,Philadelphia,"{'RestaurantsDelivery': 'False', 'OutdoorSeati...","Restaurants, Food, Bubble Tea, Coffee & Tea, B...",0.750,0.009918,"{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{Bakeries, Food, Bubble Tea, Coffee & Tea, Res...","{'RestaurantsDelivery': 'False', 'RestaurantsP...","{Food, Bakeries, Coffee & Tea, Restaurants}",...,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5,"[2010-08-18 17:05:36, 2010-11-25 17:45:31, 201...",0.005020
CF33F8-E6oudUQ46HnavjQ,Sonic Drive-In,Ashland City,"{'BusinessParking': 'None', 'BusinessAcceptsCr...","Burgers, Fast Food, Sandwiches, Food, Ice Crea...",0.250,0.000132,"{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{Burgers, Sandwiches, Ice Cream & Frozen Yogur...","{'RestaurantsTakeOut': 'True', 'GoodForKids': ...","{Burgers, Sandwiches, Ice Cream & Frozen Yogur...",...,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5,"[2012-12-16 05:27:33, 2013-02-15 04:00:46, 201...",0.000414
k0hlBqXX-Bt0vf1op7Jr1w,Tsevi's Pub And Grill,Affton,"{'Caters': 'True', 'Alcohol': 'u'full_bar'', '...","Pubs, Restaurants, Italian, Bars, American (Tr...",0.500,0.001851,"{Pubs, American (Traditional), Bars, Italian, ...","{Pubs, American (Traditional), Bars, Italian, ...","{'RestaurantsDelivery': 'False', 'RestaurantsT...","{Pubs, American (Traditional), Bars, Italian, ...",...,0.0,0.330078,0.5,0.5,0.5,0.5,0.5,0.5,"[2012-04-16 22:28:12, 2012-04-20 22:38:55, 201...",0.000701
bBDDEgkFA1Otx9Lfe7BZUQ,Sonic Drive-In,Nashville,"{'RestaurantsAttire': ''casual'', 'Restaurants...","Ice Cream & Frozen Yogurt, Fast Food, Burgers,...",0.125,0.000661,"{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...","{'RestaurantsGoodForGroups': 'False', 'GoodFor...","{Burgers, Ice Cream & Frozen Yogurt, Food, Fas...",...,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5,"[2011-01-17 15:31:21, 2011-02-23 21:28:08, 201...",0.000326
eEOYSgkmpB90uNA7lDOMRA,Vietnamese Food Truck,Tampa Bay,"{'Alcohol': ''none'', 'OutdoorSeating': 'None'...","Vietnamese, Food, Restaurants, Food Trucks",0.750,0.000661,"{Food Trucks, Food, Vietnamese, Restaurants}","{Food Trucks, Food, Vietnamese, Restaurants}","{'touristy': False, 'hipster': False, 'romanti...","{Restaurants, Food, Vietnamese, Food Trucks}",...,0.0,0.330078,0.0,0.0,0.0,0.0,0.0,0.0,"[2018-07-19 20:23:33, 2019-07-12 15:37:41, 201...",0.000195
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
l9eLGG9ZKpLJzboZq-9LRQ,Wawa,Clifton Heights,"{'BikeParking': 'True', 'BusinessAcceptsCredit...","Restaurants, Sandwiches, Convenience Stores, C...",0.500,0.000793,"{Sandwiches, Food, Convenience Stores, Coffee ...","{Sandwiches, Food, Convenience Stores, Coffee ...","{'RestaurantsPriceRange2': '1', 'RestaurantsTa...","{Sandwiches, Food, Convenience Stores, Coffee ...",...,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5,"[2010-10-17 11:36:58, 2011-01-19 01:28:20, 201...",0.001611
cM6V90ExQD6KMSU3rRB5ZA,Dutch Bros Coffee,Boise,"{'WiFi': ''free'', 'RestaurantsGoodForGroups':...","Cafes, Juice Bars & Smoothies, Coffee & Tea, R...",0.750,0.003702,"{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{Juice Bars & Smoothies, Food, Cafes, Coffee &...","{'RestaurantsGoodForGroups': 'True', 'Restaura...","{Juice Bars & Smoothies, Food, Cafes, Coffee &...",...,0.0,0.669922,0.0,0.0,0.0,0.0,0.0,0.0,"[2013-10-06 23:56:33, 2013-10-18 00:35:35, 201...",0.004110
WnT9NIzQgLlILjPT0kEcsQ,Adelita Taqueria & Restaurant,Philadelphia,"{'WheelchairAccessible': 'False', 'Restaurants...","Restaurants, Mexican",0.875,0.003967,"{Mexican, Restaurants}","{Mexican, Restaurants}","{'GoodForKids': 'True', 'RestaurantsTakeOut': ...","{Mexican, Restaurants}",...,0.5,0.330078,1.0,0.5,1.0,1.0,0.5,0.5,"[2017-10-08 12:17:20, 2017-10-28 06:18:13, 201...",0.001119
2O2K6SXPWv56amqxCECd4w,The Plum Pit,Aston,"{'RestaurantsDelivery': 'False', 'BusinessAcce...","Restaurants, Comfort Food, Food, Food Trucks, ...",0.875,0.001190,"{Event Planning & Services, Food, Food Trucks,...","{Event Planning & Services, Food, Food Trucks,...","{'RestaurantsDelivery': 'False', 'RestaurantsP...","{Event Planning & Services, Food, Food Trucks,...",...,0.5,0.330078,0.5,0.5,0.5,0.5,0.5,0.5,"[2015-06-18 23:32:40, 2018-06-20 23:24:35, 201...",0.000130


# The results of the analysis above have been implemented into DataReader().read_data()

In [22]:
businesses, _, _ = DataReader() .read_data()
businesses

KeyboardInterrupt: 