In [19]:
from datetime import datetime
import pandas as pd
import numpy as np
import os
import json
import fnmatch

In [20]:
# Files path 
general=os.path.join("Output","General")
category=os.path.join("Output", "Category")
# Leagues
leagues=['MLB', 'MLS', 'NBA', 'NFL', 'NHL']

In [21]:
# Example analysis for NFL, from "General" data
nfl_list=os.listdir(os.path.join(general, "NFL"))

In [22]:
nfl_list

['Arizona_Cardinals_restaurants.json',
 'Atlanta_Falcons_restaurants.json',
 'Baltimore_Ravens_restaurants.json',
 'Buffalo_Bills_restaurants.json',
 'Carolina_Panthers_restaurants.json',
 'Chicago_Bears_restaurants.json',
 'Cincinnati_Bengals_restaurants.json',
 'Cleveland_Browns_restaurants.json',
 'Dallas_Cowboys_restaurants.json',
 'Denver_Broncos_restaurants.json',
 'Detroit_Lions_restaurants.json',
 'Green_Bay_Packers_restaurants.json',
 'Houston_Texans_restaurants.json',
 'Indianapolis_Colts_restaurants.json',
 'Jacksonville_Jaguars_restaurants.json',
 'Kansas_City_Chiefs_restaurants.json',
 'Las_Vegas_Raiders_restaurants.json',
 'Los_Angeles_Chargers_restaurants.json',
 'Los_Angeles_Rams_restaurants.json',
 'Miami_Dolphins_restaurants.json',
 'Minnesota_Vikings_restaurants.json',
 'New_England_Patriots_restaurants.json',
 'New_Orleans_Saints_restaurants.json',
 'New_York_Giants_restaurants.json',
 'New_York_Jets_restaurants.json',
 'Philadelphia_Eagles_restaurants.json',
 'Pitt

In [23]:
# drop teams that play in the same stadium/duplicated cities
def remove_duplicated_city(inputList, dupList):
    drop_list=[]
    lista=inputList.copy()
    for v in lista:
        for d in dupList:
            if fnmatch.fnmatch(v, f'{d}*.json'):
                drop_list.append(v)
    for t in drop_list[1::2]:
        lista.remove(t)
    return lista    

In [24]:
# In case we want to remove the teams that play in the same stadium:
nfl_new_list=remove_duplicated_city(nfl_list, ['New_York', "Los_Angeles"])

## Let's have a look to the data in one file

In [25]:
#function to drop nan from a list
def drop_nan(_list):
    return [_ for _ in _list if _ is not np.nan]

In [26]:
# map the price list to numeric values
def pricemap(alist):
    price_map= {"$": 1, "$$": 2, "$$$": 3, "$$$$": 4}
    for k, v in enumerate(alist):
        if v in price_map:
            alist[k]=price_map[v]

In [27]:
def price_mapper(dictio):
    price_map= {"$": 1, "$$": 2, "$$$": 3, "$$$$": 4}
    for i in range(dictio['total']):
        if 'price' in dictio['businesses'][i] and dictio['businesses'][i]['price'] in price_map:
            dictio['businesses'][i]['price']=price_map[dictio['businesses'][i]['price']]
        else:
            dictio['businesses'][i]['price']=np.nan

In [28]:
def openfile(filep):
    with open(filep, encoding="utf-8") as f:
        jsonData=json.load(f)
    return jsonData

In [29]:
def restaurant_statistics(fileList, league, category=False):
    league_dict={}
    if category:
        folder=os.path.join("Output", "Category")
    else:
        folder=os.path.join("Output","General")
    for file in fileList:
        business_price=[]
        business_rating=[]
        business_reviews=[]
        file_path=os.path.join(folder, league, file)
        fileData=openfile(file_path)
        price_mapper(fileData)
        abs_total=fileData['absolute total']
        total=fileData['total']
        stadium=fileData['stadium']
        stadium_coord=[round(fileData['stadium latitude'],3), round(fileData['stadium longitude'],3)]
        for t in range(fileData['total']):
            business_price.append(fileData['businesses'][t]['price'])
            try:
                business_rating.append(fileData['businesses'][t]['rating'])
                business_reviews.append(fileData['businesses'][t]['review_count'])
            except:
                business_rating.append(np.nan)
                business_reviews.append(np.nan)    
    
        bprice=drop_nan(business_price)
        brating=drop_nan(business_rating)
        breview=drop_nan(business_reviews)
        league_dict[fileData['team']]=(stadium, stadium_coord, abs_total, np.mean(bprice), np.mean(brating), np.mean(breview))
    # Dataframe with the statistics    
    statistics_df = pd.DataFrame.from_dict(league_dict, orient = "index",
                           columns = ("Stadium", "Stadium coordinates", "Number of businesses", "Average Price (1-4)", "Average Ratings (1-5)", "Average Number of Reviews"))
    return statistics_df
        
    

In [77]:
# Dictionary with the statistics dataframe for each league
stats={}
for league in leagues:
    league_list=os.listdir(os.path.join(general, league))
    stats[league]=restaurant_statistics(league_list, league)

In [78]:
stats["NFL"].drop_duplicates(subset=['Stadium'], keep='first', inplace=True)

In [79]:
stats["NFL"]

Unnamed: 0,Stadium,Stadium coordinates,Number of businesses,Average Price (1-4),Average Ratings (1-5),Average Number of Reviews
Arizona Cardinals,State Farm Stadium,"[33.528, -112.263]",101,1.466667,3.089109,141.544554
Atlanta Falcons,Mercedes-Benz Stadium,"[33.756, -84.4]",665,1.56681,3.386466,172.425564
Baltimore Ravens,M&T Bank Stadium,"[39.278, -76.623]",784,1.718095,3.611607,124.137755
Buffalo Bills,Bills Stadium,"[42.774, -78.787]",64,1.553191,3.28125,43.609375
Carolina Panthers,Bank of America Stadium,"[35.226, -80.853]",448,1.68,3.685268,158.205357
Chicago Bears,Soldier Field,"[41.862, -87.617]",1200,1.72904,3.6825,269.505
Cincinnati Bengals,Paul Brown Stadium,"[39.095, -84.516]",425,1.605096,3.808235,122.908235
Cleveland Browns,FirstEnergy Stadium,"[41.506, -81.699]",376,1.714815,3.728723,116.529255
Dallas Cowboys,AT&T Stadium,"[32.748, -97.093]",223,1.406977,3.275785,105.493274
Denver Broncos,Empower Field at Mile High,"[39.741, -105.02]",715,1.706542,3.881818,239.097902
