# Yelp Dataset
___
**Context** The dataset here comprises of ...

## Importing modules and loading data <a name="import"></a>

1. First we import python modules

In [1]:
# Essential Data Analysis Ecosystem Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# From Python Standard Library
import os, sys
import json  # encoding and decoding json data

import ast # Safely evaluate an expression node or a string containing a Python literal or container display.

In [2]:
# set plots to be embedded inline
%matplotlib inline

# suppress warnings from final output
import warnings
warnings.simplefilter("ignore")

2. import the dataset

In [3]:
dataset_path = '../input/yelp_dataset/'
list(os.listdir(dataset_path))

['business.json',
 'Yelp_Dataset_Challenge_Round_13.pdf',
 'Dataset_Challenge_Dataset_Agreement.pdf',
 'user.json',
 'checkin.json',
 'tip.json',
 'review.json',
 'photo.json']

In [4]:
#  flattening JSON objects of arbitrary structure
def flatten_json(y, deepflat=False):
    """Recursively extract values out of the object into a flattened dictionary. 
    json_normalize can be applied to the output of flatten_object to produce a python dataframe.
    """
    out = {}

    def flatten(x, name=''):
        if isinstance(x, dict):
            for a in x:
                flatten(x[a], name + a + '_')
        elif isinstance(x, list):
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        elif deepflat and isinstance(x, str) and x.startswith('{') and x.endswith('}'):
            x = ast.literal_eval(x)  # convert string representation-of-dictionary to actual dictionary.
            for a in x:
                flatten(x[a], name + a + '_')
        else:
            out[str(name[:-1])] = str(x)

    flatten(y)
    return out

In [5]:
def json_file_to_dataframe(json_file, deepflat=False):
    with open(json_file, 'r') as reader:
        _list = []
        for line in reader:
            _dict = flatten_json(json.loads(line), deepflat)
            _list.append(_dict)

    return pd.DataFrame(_list)

In [6]:
businesses = json_file_to_dataframe(os.path.join(dataset_path, 'business.json'), deepflat=True) ; print(businesses.shape, 'businesses')
users      = json_file_to_dataframe(os.path.join(dataset_path, 'user.json')    , deepflat=True) ; print(users.shape, 'users')
checkins   = json_file_to_dataframe(os.path.join(dataset_path, 'checkin.json') , deepflat=True) ; print(checkins.shape, 'checkins')
tips       = json_file_to_dataframe(os.path.join(dataset_path, 'tip.json')     , deepflat=False); print(tips.shape, 'tips')
reviews    = json_file_to_dataframe(os.path.join(dataset_path, 'review.json')  , deepflat=False); print(reviews.shape, 'reviews')
photos     = json_file_to_dataframe(os.path.join(dataset_path, 'photo.json')   , deepflat=False); print(photos.shape, 'photos')

(192609, 109) businesses
(1637138, 22) users
(161950, 2) checkins
(1223094, 5) tips
(6685900, 9) reviews
(200000, 4) photos


In [29]:
print(businesses.nunique())

address                                  151977
attributes                                    1
attributes_AcceptsInsurance                   3
attributes_AgesAllowed                        5
attributes_Alcohol                            7
attributes_Ambience                           1
attributes_Ambience_casual                    2
attributes_Ambience_classy                    2
attributes_Ambience_divey                     2
attributes_Ambience_hipster                   2
attributes_Ambience_intimate                  2
attributes_Ambience_romantic                  2
attributes_Ambience_touristy                  2
attributes_Ambience_trendy                    2
attributes_Ambience_upscale                   2
attributes_BYOB                               2
attributes_BYOBCorkage                        7
attributes_BestNights                         1
attributes_BestNights_friday                  2
attributes_BestNights_monday                  2
attributes_BestNights_saturday          

In [39]:
businesses.attributes_RestaurantsReservations.value_counts()

False    32091
True     20147
None        49
Name: attributes_RestaurantsReservations, dtype: int64

In [40]:
mask = businesses.attributes_RestaurantsReservations == 'True'

In [47]:
businesses[mask].sample()

Unnamed: 0,address,attributes,attributes_AcceptsInsurance,attributes_AgesAllowed,attributes_Alcohol,attributes_Ambience,attributes_Ambience_casual,attributes_Ambience_classy,attributes_Ambience_divey,attributes_Ambience_hipster,...,hours_Tuesday,hours_Wednesday,is_open,latitude,longitude,name,postal_code,review_count,stars,state
130218,735 Middlefield Road,,,,u'beer_and_wine',,True,False,False,False,...,11:0-22:0,11:0-22:0,1,43.8197999,-79.2615326,Royal Chinese Seafood Restaurant,M1V 5H5,27,3.5,ON


In [49]:
businesses[mask].name.value_counts()

Boston Pizza                          61
Swiss Chalet Rotisserie & Grill       51
Outback Steakhouse                    40
Olive Garden Italian Restaurant       37
Red Lobster                           36
Wild Wing                             28
St. Louis Bar & Grill                 26
Pizza Hut                             25
Jack Astor's Bar & Grill              24
LongHorn Steakhouse                   23
Moxie's Grill & Bar                   22
Streets of New York                   22
Montana's BBQ & Bar                   20
Ruby Tuesday                          19
P.F. Chang's                          19
Rôtisserie St-Hubert                  18
Bâton Rouge Steakhouse & Bar          17
Kelseys Original Roadhouse            16
Milestones Restaurants                16
Chuck E. Cheese's                     16
Chili's                               15
The Captain's Boil                    15
East Side Mario's                     14
Applebee's Grill + Bar                14
Buca di Beppo It

In [50]:
users.nunique()

average_stars             400
compliment_cool          1561
compliment_cute           337
compliment_funny         1561
compliment_hot           1348
compliment_list           196
compliment_more           354
compliment_note           952
compliment_photos         863
compliment_plain         1550
compliment_profile        356
compliment_writer         825
cool                     3950
elite                     756
fans                      591
friends                933769
funny                    3490
name                   124917
review_count             1791
useful                   4737
user_id               1637138
yelping_since         1631010
dtype: int64