In [1]:
import pandas as pd
import os

In [2]:
def clean_restaurant(data):
    '''
    Given a json dictionary of information, access & pull most relevant data.
    Helps to reshape the full dataframe. 
    '''
    
    keys_of_interest = ['name', 'rating', 'price', 'categories', 'coordinates']
    
    cleaned_data = {}
    
    for i in range(len(keys_of_interest)):
        
        key = keys_of_interest[i]
        
        if key in data.keys():
            
            if key == 'categories':
                
                cleaned_data['categories'] = ', '.join([cat['alias'] for cat in data['categories']])
            
            elif key == 'coordinates':
                
                cleaned_data['latitude'] = data['coordinates']['latitude']
                cleaned_data['longitude'] = data['coordinates']['longitude']
                
            elif key == 'price':
                
                cleaned_data['price'] = data['price'].count('$')
                
            else:
                
                cleaned_data[key] = data[key]
        else:
            
            cleaned_data[key] = 'null'
    
    return cleaned_data

In [3]:
path = 'data/General/'
files = os.listdir(path)

df = pd.DataFrame()
for file in files:
    
    league = 'data/General/' + file + '/'
    teams = os.listdir(league)
    
    for team in teams:
        
        curr = 'data/General/' + file + '/' + team
        big_df = pd.read_json(curr)
        
        curr_df = pd.DataFrame()
        restaurants = big_df['businesses'].apply(clean_restaurant)
        
        for rest in restaurants:
            
            rest['league'] = file
            rest['team'] = big_df['team'].iloc[0]
            rest['stadium'] = big_df['stadium'].iloc[0]
            rest['team_latitude'] = big_df['stadium latitude'].iloc[0]
            rest['team_longitude'] = big_df['stadium longitude'].iloc[0]
            rest['state'] = big_df['state'].iloc[0]
            rest['city'] = big_df['city'].iloc[0]
            
            curr_df = pd.concat([curr_df, pd.DataFrame(rest, index = [0])], ignore_index=True)
        
        df = pd.concat([df, curr_df], ignore_index = True)
        
df.reset_index(drop = True)
df.head()

Unnamed: 0,name,rating,price,categories,latitude,longitude,league,team,stadium,team_latitude,team_longitude,state,city
0,Oriole,5.0,4,newamerican,41.886097,-87.644948,NBA,Chicago Bulls,United Center,41.880556,-87.674167,Illinois,Chicago
1,Spinning J Bakery and Soda Fountain,4.5,2,"coffee, breakfast_brunch, sandwiches",41.89945,-87.69716,NBA,Chicago Bulls,United Center,41.880556,-87.674167,Illinois,Chicago
2,S.K.Y.,4.5,3,"newamerican, cocktailbars, wine_bars",41.857808,-87.658043,NBA,Chicago Bulls,United Center,41.880556,-87.674167,Illinois,Chicago
3,Kai Zan,4.5,3,"asianfusion, japanese, sushi",41.895505,-87.691541,NBA,Chicago Bulls,United Center,41.880556,-87.674167,Illinois,Chicago
4,Sabroso! Mexican Grill,4.5,2,"mexican, breakfast_brunch, newmexican",41.89319,-87.66753,NBA,Chicago Bulls,United Center,41.880556,-87.674167,Illinois,Chicago


In [4]:
df.shape

(81154, 13)

In [5]:
df.to_csv('data/restaurants.csv', index=False)