In [1]:
# Dependencies and Setup
from datetime import datetime
import pandas as pd
import numpy as np
import os
import json
from pprint import pprint
import fnmatch

In [2]:
# Day of the week
today = datetime.today()
weekday = today.isoweekday()

days_week_dict = {1: "Monday", 2: "Tuesday", 3: "Wednesday", 4: "Thursday",
                 5: "Friday", 6: "Saturday", 7: "Sunday"}
day_of_the_week = days_week_dict.get(weekday)
print(day_of_the_week)

Monday


In [3]:
stadium_data_df = pd.read_csv('Resources/stadium_coordinates.csv', dtype={
    "NFL Team": "string",
    "City": "string",
    "State": "string",
    "Stadium Name": "string",
    "Latitude": np.float64,
    "Longitude": np.float64,
    "Zip Code": "string"
})
stadium_data_df = stadium_data_df.dropna()
stadium_data_df.head()

Unnamed: 0,NFL Team,City,State,Stadium Name,Latitude,Longitude,Zip Code
0,Arizona Cardinals,Glendale,Arizona,State Farm Stadium,33.528,-112.263,85305
1,Atlanta Falcons,Atlanta,Georgia,Mercedes-Benz Stadium,33.755556,-84.4,30313
2,Baltimore Ravens,Baltimore,Maryland,M&T Bank Stadium,39.278056,-76.622778,21230
3,Buffalo Bills,Orchard Park,New York,Bills Stadium,42.774,-78.787,14127
4,Carolina Panthers,Charlotte,North Carolina,Bank of America Stadium,35.225833,-80.852778,28202


In [4]:
# Function to create a list of json files names

def folder_dict(equipo, a_list):    
    equipo_list = []
    for file in a_list:
        if fnmatch.fnmatch(file, f'*{equipo}*.json'):
            equipo_list.append(file)           
    return equipo_list

In [5]:
# Create a dictionary with the json files names using NFL Team names
nfl_teams = stadium_data_df["NFL Team"]
parent_dir = 'json_files'
folder_dir = f'{day_of_the_week}_Categories'
path = os.path.join(parent_dir, folder_dir)
files_list = os.listdir(path)

In [6]:
# Check integrity of json files. 
remove_list = []
for file in files_list:
    open_path = os.path.join(path, file)
    with open(open_path, 'r', encoding = 'utf-8') as f:
        contents = json.loads(f.read())
        try:
            total = contents['total']
                        
        except:
            print(f"INTERNAL_ERROR in {file}")
            print("Something went wrong internally, please try downloading the json file again later.\n")
            file = f'{file}'
            remove_list.append(file)
        else:
            print(f'\nNo errors found in the json file:\n{file}\n')


No errors found in the json file:
yelp_category_response_Arizona Cardinals_0.json


No errors found in the json file:
yelp_category_response_Atlanta Falcons_0.json


No errors found in the json file:
yelp_category_response_Atlanta Falcons_20.json


No errors found in the json file:
yelp_category_response_Atlanta Falcons_40.json


No errors found in the json file:
yelp_category_response_Baltimore Ravens_0.json


No errors found in the json file:
yelp_category_response_Baltimore Ravens_20.json


No errors found in the json file:
yelp_category_response_Baltimore Ravens_40.json


No errors found in the json file:
yelp_category_response_Buffalo Bills_0.json


No errors found in the json file:
yelp_category_response_Carolina Panthers_0.json


No errors found in the json file:
yelp_category_response_Carolina Panthers_20.json


No errors found in the json file:
yelp_category_response_Carolina Panthers_40.json


No errors found in the json file:
yelp_category_response_Carolina Panthers_60.json

In [7]:
# Print the files with errors 
if not remove_list:
    print(f'No erros in json files.\nNo files to be removed for the analysis.')
else:
    print(f'Files with errors:\n{remove_list}')

No erros in json files.
No files to be removed for the analysis.


In [8]:
# Remove the files with errors from the list if there is any
try:
    for files_with_errors in remove_list:
        files_list.remove(files_with_errors)
except ValueError:
    print('No files to be removed')

In [9]:
# Create a dictionary with the json files names using NFL Team names as keys 
file_dict = {}
for football in nfl_teams:
    file_dict[football]= folder_dict(football, files_list)
    
#file_dict

In [10]:
# Function to check if there are keys with empty lists due to json parsing errors and deletes them.
def find_empty_key(dictionary):
    
    for k, v in dictionary.items():
        if v == []:
            kpop = k
            print(f"Key to pop: {kpop}")
    return kpop   


In [11]:
# Check if there are empty keys in the dictionary
try:
    empty_key = find_empty_key(file_dict)
    file_dict.pop(empty_key)
except UnboundLocalError:
    print("No empty keys in the dictionary")

Key to pop: Miami Dolphins


In [12]:
# Function to get in one single list items from a nested list
def single_list(nested_list, index):
    s_list = []
    for i in range(len(nested_list)):
        for j in range(len(nested_list[i][index])):
            s_list.append(nested_list[i][index][j])
    return s_list

In [13]:
# Function to read the json files and create a list wiht the search fields per NFL team
def read_json_data(files_list):
    business_price = []
    business_rating = []
    business_reviews = []
    number_business = []
    result = []
    result_joined = ()
    
    for file in files_list:
        open_path = os.path.join(path, file)
        with open(open_path, 'r', encoding = 'utf-8') as f:
            contents = json.loads(f.read())
            number_business.append(contents['total'])
            businesses_in_file = len(contents['businesses'])
            for i in range(businesses_in_file):
                try:
                    business_price.append(contents['businesses'][i]['price'])
                    business_rating.append(contents['businesses'][i]['rating'])
                    business_reviews.append(contents['businesses'][i]['review_count'])
                    
                
               
                except:
                    business_price.append(np.nan)
                    business_rating.append(np.nan)
                    business_reviews.append(np.nan)
                    
                
        
        result.append([business_price, business_rating, business_reviews, file])
        result_joined = (single_list(result,0),single_list(result,1), single_list(result,2), number_business[0])
        
        business_price = []
        business_rating = []
        business_reviews = []
        #print(f'Total number of restaurants: {number_business[counter]}')
        
    return result_joined

In [14]:
# Create a dict with the results from the json files 
data_team_dict= {}
for key in file_dict:
    last_name = key.split()
    team_name = f'{last_name[0]}_{last_name[-1]}'
    vars()[team_name]= read_json_data(file_dict[key])
    data_team_dict[team_name]= vars()[team_name]
    print(team_name)


Arizona_Cardinals
Atlanta_Falcons
Baltimore_Ravens
Buffalo_Bills
Carolina_Panthers
Chicago_Bears
Cincinnati_Bengals
Cleveland_Browns
Dallas_Cowboys
Denver_Broncos
Detroit_Lions
Green_Packers
Houston_Texans
Indianapolis_Colts
Jacksonville_Jaguars
Kansas_Chiefs
Las_Raiders
Los_Chargers
Los_Rams
Minnesota_Vikings
New_Patriots
New_Saints
New_Giants
New_Jets
Philadelphia_Eagles
Pittsburgh_Steelers
San_49ers
Seattle_Seahawks
Tampa_Buccaners
Tennessee_Titans
Washington_Team


In [15]:
# Function to drop np.nan fro a list
def drop_nan(alst):
    the_list = [i for i in alst if i is not np.nan]
    return the_list

In [16]:
class Exception_1(Exception):
    """Raise error if there is team json file missing"""
    pass

In [17]:
# Create a clean dictionary without np.nan
clean_list=[]
clean_team_dict = {}
missing_teams = []
try:
    if  data_team_dict.items() == False:
            
            raise Exception_1
    
    for key in data_team_dict:
            p = drop_nan(data_team_dict[key][0])
            ra = drop_nan(data_team_dict[key][1])
            re = drop_nan(data_team_dict[key][2])
            clean_team_dict[key] = (p,ra,re,data_team_dict[key][3])
except Exception_1 as e:
    print(f'team(s) {missing_teams} json file(s) missing')
    print(e)

In [18]:
# Function to replace items of a list using a dictionary
def replace(lst, dictionary):
    for k, v in enumerate(lst):
        if v in dictionary:
            lst[k] = dictionary[v]
    

In [19]:
price_dict= {"$": 1, "$$": 2, "$$$": 3, "$$$$": 4}

for key in clean_team_dict:
    replace(clean_team_dict[key][0], price_dict)

#print(clean_team_dict)

In [20]:
statistics_dict= {}

for key in clean_team_dict:
    price_mean = np.mean(clean_team_dict[key][0])
    rating_mean = np.mean(clean_team_dict[key][1])
    reviews_mean = np.mean(clean_team_dict[key][2])
    statistics_dict[key]= (rating_mean, reviews_mean, price_mean, clean_team_dict[key][3])
#statistics_dict

In [21]:
categories_df = pd.DataFrame.from_dict(statistics_dict, orient = "index",
                           columns = ("Average Ratings (1-5)","Average Number of Reviews", 
                                      "Average Price (1-4)", "Number of businesses"))
means_df = categories_df.reset_index(drop = True)
clean_nfl_teams = nfl_teams.drop(19)
means_df["NFL Team"] = clean_nfl_teams
means_df = means_df.dropna()
means_df.head()

Unnamed: 0,Average Ratings (1-5),Average Number of Reviews,Average Price (1-4),Number of businesses,NFL Team
0,3.136364,257.818182,2.090909,11,Arizona Cardinals
1,3.5,393.702128,2.12766,57,Atlanta Falcons
2,3.840426,233.744681,2.170213,55,Baltimore Ravens
3,2.5,30.0,2.0,3,Buffalo Bills
4,3.773438,258.6875,2.125,69,Carolina Panthers


In [22]:
means_categories_df = stadium_data_df.merge(means_df, on= "NFL Team", how='left')
means_categories_df =  means_categories_df.dropna()
means_categories_df.head()

Unnamed: 0,NFL Team,City,State,Stadium Name,Latitude,Longitude,Zip Code,Average Ratings (1-5),Average Number of Reviews,Average Price (1-4),Number of businesses
0,Arizona Cardinals,Glendale,Arizona,State Farm Stadium,33.528,-112.263,85305,3.136364,257.818182,2.090909,11.0
1,Atlanta Falcons,Atlanta,Georgia,Mercedes-Benz Stadium,33.755556,-84.4,30313,3.5,393.702128,2.12766,57.0
2,Baltimore Ravens,Baltimore,Maryland,M&T Bank Stadium,39.278056,-76.622778,21230,3.840426,233.744681,2.170213,55.0
3,Buffalo Bills,Orchard Park,New York,Bills Stadium,42.774,-78.787,14127,2.5,30.0,2.0,3.0
4,Carolina Panthers,Charlotte,North Carolina,Bank of America Stadium,35.225833,-80.852778,28202,3.773438,258.6875,2.125,69.0


In [19]:
file_name = f'{day_of_the_week}_categories_ouput.csv'
output_folder = 'Resources'
output_path = os.path.join(output_folder, file_name)
means_categories_df.to_csv(output_path, index=False, header=True, encoding = "utf-8")

In [20]:
#Categories: sport bars, cocktail bars & pubs