In [16]:
# Dependencies and Setup
from datetime import datetime
import pandas as pd
import numpy as np
import os
import json
from pprint import pprint
import fnmatch
from scipy import stats

In [3]:
# Day of the week
today = datetime.today()
weekday = today.isoweekday()

days_week_dict = {1: "Monday", 2: "Tuesday", 3: "Wednesday", 4: "Thursday",
                 5: "Friday", 6: "Saturday", 7: "Sunday"}
day_of_the_week = days_week_dict.get(weekday)

print(day_of_the_week)


Monday


In [4]:
stadium_data_df = pd.read_csv('Resources/stadium_coordinates.csv', dtype={
    "NFL Team": "string",
    "City": "string",
    "State": "string",
    "Stadium Name": "string",
    "Latitude": np.float64,
    "Longitude": np.float64,
    "Zip Code": "string"
})
stadium_data_df = stadium_data_df.dropna()
stadium_data_df.head()

Unnamed: 0,NFL Team,City,State,Stadium Name,Latitude,Longitude,Zip Code
0,Arizona Cardinals,Glendale,Arizona,State Farm Stadium,33.528,-112.263,85305
1,Atlanta Falcons,Atlanta,Georgia,Mercedes-Benz Stadium,33.755556,-84.4,30313
2,Baltimore Ravens,Baltimore,Maryland,M&T Bank Stadium,39.278056,-76.622778,21230
3,Buffalo Bills,Orchard Park,New York,Bills Stadium,42.774,-78.787,14127
4,Carolina Panthers,Charlotte,North Carolina,Bank of America Stadium,35.225833,-80.852778,28202


In [5]:
# Series with the NFL teams
nfl_teams = stadium_data_df["NFL Team"]

In [6]:
# Function to create a list of json files names

def folder_dict(equipo, a_list):    
    equipo_list = []
    for file in a_list:
        if fnmatch.fnmatch(file, f'*{equipo}*.json'):
            equipo_list.append(file)           
    return equipo_list

In [7]:
# Create a dictionary with the json files names using NFL Team names as keys 
nfl_teams = stadium_data_df["NFL Team"]
parent_dir = 'json_files'
folder_dir = f'{day_of_the_week}1000'
path = os.path.join(parent_dir, folder_dir)
files_list = os.listdir(path)
file_dict = {}
for football in nfl_teams:
    file_dict[football]= folder_dict(football, files_list)
    
#file_dict

In [8]:
# Function to get in one single list items from a nested list
def single_list(nested_list, index):
    s_list = []
    for i in range(len(nested_list)):
        for j in range(len(nested_list[i][index])):
            s_list.append(nested_list[i][index][j])
    return s_list

In [9]:
# Function to read the json files and create a list wiht the search fields per NFL team
def read_json_data(files_list):
    business_price = []
    business_rating = []
    business_reviews = []
    number_business = []
    result = []
    
    for file in files_list:
        open_path = os.path.join(path, file)
        with open(open_path, 'r', encoding = 'utf-8') as f:
            contents = json.loads(f.read())
            number_business.append(contents['total'])
            businesses_in_file = len(contents['businesses'])
            for i in range(businesses_in_file):
                try:
                    business_price.append(contents['businesses'][i]['price'])
                    business_rating.append(contents['businesses'][i]['rating'])
                    business_reviews.append(contents['businesses'][i]['review_count'])
                    
                
               
                except:
                    business_price.append(np.nan)
                    business_rating.append(np.nan)
                    business_reviews.append(np.nan)
                    
                
        
        result.append([business_price, business_rating, business_reviews, file])
        result_joined = (single_list(result,0),single_list(result,1), single_list(result,2), number_business[0])
        business_price = []
        business_rating = []
        business_reviews = []
        #print(f'Total number of restaurants: {number_business[counter]}')
        #print(counter, file)
        
    return result_joined

In [10]:
# Create a dict with the results from the json files 
data_team_dict= {}
for key in file_dict:
    last_name = key.split()
    team_name = f'{last_name[0]}_{last_name[-1]}'
    vars()[team_name]= read_json_data(file_dict[key])
    data_team_dict[team_name]= vars()[team_name]
    #print(team_name)

In [11]:
# Function to drop np.nan fro a list
def drop_nan(alst):
    the_list = [i for i in alst if i is not np.nan]
    return the_list

In [12]:
# Create a clean dictionary without np.nan
clean_list=[]
clean_team_dict = {}
for key in data_team_dict:
    a = drop_nan(data_team_dict[key][0])
    b = drop_nan(data_team_dict[key][1])
    c = drop_nan(data_team_dict[key][2])
    clean_team_dict[key] = (a,b,c,data_team_dict[key][3])

In [13]:
# Function to replace items of a list using a dictionary
def replace(lst, dictionary):
    for k, v in enumerate(lst):
        if v in dictionary:
            lst[k] = dictionary[v]
    

In [14]:
price_dict= {"$": 1, "$$": 2, "$$$": 3, "$$$$": 4}

for key in clean_team_dict:
    replace(clean_team_dict[key][0], price_dict)

#print(clean_team_dict)

In [40]:
statistics_dict= {}

for key in clean_team_dict:
    price_mean = np.mean(clean_team_dict[key][0])
    rating_mean = np.mean(clean_team_dict[key][1])
    reviews_mean = np.mean(clean_team_dict[key][2])
    price_mode = stats.mode(clean_team_dict[key][0],axis=None)
    statistics_dict[key]= (rating_mean, reviews_mean, price_mean, (price_mode[0][0],price_mode[1][0]), clean_team_dict[key][3])

In [41]:
statistics_df = pd.DataFrame.from_dict(statistics_dict, orient = "index",
                           columns = ("Average Ratings (1-5)","Average Number of Reviews", 
                                      "Average Price (1-4)", "Price Mode", "Number of businesses"))
means_df = statistics_df.reset_index(drop = True)
 
means_df["NFL Team"] = nfl_teams
means_df.head()

Unnamed: 0,Average Ratings (1-5),Average Number of Reviews,Average Price (1-4),Price Mode,Number of businesses,NFL Team
0,3.0,162.381579,1.473684,"(1, 41)",94,Arizona Cardinals
1,3.404537,248.597353,1.623819,"(2, 261)",704,Atlanta Falcons
2,3.537657,160.67364,1.705021,"(2, 256)",666,Baltimore Ravens
3,3.166667,50.060606,1.69697,"(2, 23)",51,Buffalo Bills
4,3.635965,190.80117,1.669591,"(2, 176)",441,Carolina Panthers


In [42]:
means_1000_df = stadium_data_df.merge(means_df, on= "NFL Team", how='left')
means_1000_df.head()

Unnamed: 0,NFL Team,City,State,Stadium Name,Latitude,Longitude,Zip Code,Average Ratings (1-5),Average Number of Reviews,Average Price (1-4),Price Mode,Number of businesses
0,Arizona Cardinals,Glendale,Arizona,State Farm Stadium,33.528,-112.263,85305,3.0,162.381579,1.473684,"(1, 41)",94
1,Atlanta Falcons,Atlanta,Georgia,Mercedes-Benz Stadium,33.755556,-84.4,30313,3.404537,248.597353,1.623819,"(2, 261)",704
2,Baltimore Ravens,Baltimore,Maryland,M&T Bank Stadium,39.278056,-76.622778,21230,3.537657,160.67364,1.705021,"(2, 256)",666
3,Buffalo Bills,Orchard Park,New York,Bills Stadium,42.774,-78.787,14127,3.166667,50.060606,1.69697,"(2, 23)",51
4,Carolina Panthers,Charlotte,North Carolina,Bank of America Stadium,35.225833,-80.852778,28202,3.635965,190.80117,1.669591,"(2, 176)",441


In [43]:
file_name = f'{day_of_the_week}1000_ouput.csv'
output_folder = 'Resources'
output_path = os.path.join(output_folder, file_name)
means_1000_df.to_csv(output_path, index=False, header=True, encoding = "utf-8")