In [1]:
# Dependencies and Setup
from datetime import datetime
import pandas as pd
import numpy as np
import os
import requests
import json
from pprint import pprint
from dotenv import load_dotenv

In [2]:
# API Call
load_dotenv()
API_KEY = os.getenv("Yelp_API") # put your key here if you don't have a .env file in your main project folder.
headers = {'Authorization': 'Bearer %s' % API_KEY}
base_url = "https://api.yelp.com/v3/businesses/search"

In [3]:
# Stadiums/Arenas data folder
dataFolder='stadiums_data'

In [4]:
def createFolder(outFolder, cat=False):
    # Creates directory for current day of the week, raises an error if already exists.
    root_dir = 'json_files'
    if cat:
        parent_dir = os.path.join(root_dir, "Category_request")
    else:
        parent_dir = os.path.join(root_dir, "General_request")
        
    path = os.path.join(parent_dir, outFolder)
    try:
        os.mkdir(path)
    except:
        print('Folder already exists.')
        
    return path

In [5]:
def League(path=dataFolder, cat=False):
    today = datetime.today()
    liga=input("Input value:\nNFL, NBA, MLB, MLS, NHL: ")
    filename=f'{liga}.csv'
    filepath=os.path.join(path,filename)
    df=pd.read_csv(filepath, encoding='utf-8', dtype={"Team": "string", "City": "string", "State": "string", "Stadium Name": "string", 
                                    "Latitude": np.float64, "Longitude": np.float64})
    outputFolder=f'cat_{liga}_{today.day}_{today.month}_{today.year}_{today.hour}h{today.minute}m'
    output_path=createFolder(outputFolder, cat)
    return df, output_path   

In [6]:
def teamLoc(df):
    zipi=zip(df["Latitude"], df["Longitude"], df["Team"])
    return list(zipi)

In [7]:
class Exception_1(Exception):
    """ Raised if "total" is not found in the request """
    pass

In [8]:
# Function to download the JSON files
def downloadJSON(lista, term, radius, path, cat=False):
    count_f = 0
    # API parameters
    if cat:
        search_params = {"term": term, "limit": 20, "radius": radius, "sort_by": "rating",
                     "categories":("sportsbars","pubs","wine_bars", "cocktailbars")}
    else:
        search_params = {"term": term, "limit": 20, "radius": radius, "sort_by": "rating"}
        
    for latitude, longitude, team in lista:

        search_params.update({"latitude": latitude, "longitude": longitude})
        response = requests.get(url = base_url, params = search_params, headers = headers).json()
        try:

            if response['total']== False:
                raise Exception_1
            elif response['total'] >= 1000:
                total = 1000
            else:
                total = response['total']

            for search_offset in range(0, total, 20):

                search_params.update({"offset": search_offset})
                response2 = requests.get(url = base_url, params = search_params, headers = headers).json()
                file_name = f'yelp_response_{team}_{search_offset}.json'
                output_path = os.path.join(path, file_name)

                with open(output_path,'w', encoding = 'utf-8') as f:
                    json.dump(response2, f, ensure_ascii=False, indent =4)
                f.close()

        except Exception_1 as e:
            print("Error: 'total' not found.\nPlease try again later.")
            print(e)

        print(f'{count_f}, {team} total: {total}')

        count_f += 1

In [9]:
stadium_df, outpath=League(dataFolder, True)

Input value:
NFL, NBA, MLB, MLS, NHL: NHL


In [10]:
stadium_df

Unnamed: 0,Team,City,State,Stadium Name,Latitude,Longitude
0,Tampa Bay Lightning,Tampa,Florida,Amalie Arena,27.942824,-82.451785
1,Dallas Stars,Dallas,Texas,American Airlines Center,32.790556,-96.810278
2,Colorado Avalanche,Denver,Colorado,Ball Arena,39.748837,-105.007613
3,Florida Panthers,Sunrise,Florida,BB&T Center,26.158333,-80.325556
4,Montreal Canadiens,Montreal,Quebec,Bell Centre,45.496111,-73.569444
5,Winnipeg Jets,Winnipeg,Manitoba,Bell MTS Place,49.892778,-97.143611
6,Nashville Predators,Nashville,Tennessee,Bridgestone Arena,36.159167,-86.778611
7,Ottawa Senators,Ottawa,Ontario,Canadian Tire Centre,45.297049,-75.926838
8,Washington Capitals,Washington,D.C.,Capital One Arena,38.89869,-77.020843
9,St. Louis Blues,St. Louis,Missouri,Enterprise Center,38.626667,-90.2025


In [11]:
stadium=teamLoc(stadium_df)

In [12]:
stadium

[(27.942823999999998, -82.451785, 'Tampa Bay Lightning'),
 (32.790556, -96.810278, 'Dallas Stars'),
 (39.748837, -105.00761299999999, 'Colorado Avalanche'),
 (26.158333, -80.325556, 'Florida Panthers'),
 (45.496111, -73.56944399999999, 'Montreal Canadiens'),
 (49.892778, -97.14361099999999, 'Winnipeg Jets'),
 (36.159167, -86.778611, 'Nashville Predators'),
 (45.2970495, -75.92683790000001, 'Ottawa Senators'),
 (38.89869, -77.020843, 'Washington Capitals'),
 (38.626667, -90.2025, 'St. Louis Blues'),
 (33.532269, -112.26128, 'Arizona Coyotes'),
 (33.807778000000006, -117.876667, 'Anaheim Ducks'),
 (42.875, -78.876389, 'Buffalo Sabres'),
 (42.341111, -83.055, 'Detroit Red Wings'),
 (40.750556, -73.993611, 'New York Rangers'),
 (40.722778000000005, -73.590556, 'New York Islanders'),
 (39.969283000000004, -83.006111, 'Columbus Blue Jackets'),
 (35.803333, -78.721944, 'Carolina Hurricanes'),
 (40.439444, -79.98916700000001, 'Pittsburgh Penguins'),
 (40.733610999999996, -74.171111, 'New Jerse

In [13]:
downloadJSON(stadium, "restaurants", 3000, outpath, True)

0, Tampa Bay Lightning total: 41
1, Dallas Stars total: 101
2, Colorado Avalanche total: 98
3, Florida Panthers total: 3
4, Montreal Canadiens total: 90
5, Winnipeg Jets total: 19
6, Nashville Predators total: 87
7, Ottawa Senators total: 7
8, Washington Capitals total: 174
9, St. Louis Blues total: 45
10, Arizona Coyotes total: 12
11, Anaheim Ducks total: 22
12, Buffalo Sabres total: 28
13, Detroit Red Wings total: 66
14, New York Rangers total: 626
15, New York Islanders total: 16
16, Columbus Blue Jackets total: 65
17, Carolina Hurricanes total: 6
18, Pittsburgh Penguins total: 63
19, New Jersey Devils total: 16
20, Vancouver Canucks total: 94
21, Edmonton Oilers total: 37
22, Toronto Maple Leafs total: 129
23, Calgary Flames total: 65
24, San Jose Sharks total: 28
25, Los Angeles Kings total: 74
26, Boston Bruins total: 175
27, Vegas Golden Knights total: 91
28, Chicago Blackhawks total: 80
29, Philadelphia Flyers total: 17
30, Minnesota Wild total: 28


## Check json files integrity
Sometimes, due to an internal error in Yelp, the YELP API would return an empty file with an error code in it. 

Print to terminal if the json file has errors or not. 

If there are files with errors, the above request data needs to be re-run.

In [14]:
def JSONcheck(jsonpath):
    files_list = os.listdir(jsonpath)
    error_list = []
    for file in files_list:
        open_path = os.path.join(jsonpath, file)
        with open(open_path, 'r', encoding = 'utf-8') as f:
            contents = json.loads(f.read())
            try:
                total = contents['total']
            except:
                print(f"INTERNAL_ERROR in {file}")
                print("Something went wrong internally, please try downloading the json file again later.\n")
                file = f'{file}'
                error_list.append(file)
            else:
                print(f'\nNo errors found in the json file:\n{file}\n')
        
    if not error_list:
        print(f'No erros in json files.\nNo files to be removed for the analysis.')
    else:
        print(f'Files with errors:\n{error_list}')
    return error_list

In [15]:
error_list = JSONcheck(outpath)


No errors found in the json file:
yelp_response_Anaheim Ducks_0.json


No errors found in the json file:
yelp_response_Anaheim Ducks_20.json


No errors found in the json file:
yelp_response_Arizona Coyotes_0.json


No errors found in the json file:
yelp_response_Boston Bruins_0.json


No errors found in the json file:
yelp_response_Boston Bruins_100.json


No errors found in the json file:
yelp_response_Boston Bruins_120.json


No errors found in the json file:
yelp_response_Boston Bruins_140.json


No errors found in the json file:
yelp_response_Boston Bruins_160.json


No errors found in the json file:
yelp_response_Boston Bruins_20.json


No errors found in the json file:
yelp_response_Boston Bruins_40.json


No errors found in the json file:
yelp_response_Boston Bruins_60.json


No errors found in the json file:
yelp_response_Boston Bruins_80.json


No errors found in the json file:
yelp_response_Buffalo Sabres_0.json


No errors found in the json file:
yelp_response_Buffalo Sabr

In [16]:
error_list

[]

In [None]:
def tryagain(lista, cat=False):
    if cat:
        pos=3
    else:
        pos=2
    team_errors=[]
    for file in lista:
        aux=file.split("_")[pos]
        team_errors.append(aux)
    return team_errors

In [None]:
team_errors=tryagain(error_list)
team_errors

## Try to download again the cities with errors

In [None]:
# hoy = datetime.today()
# newfolder=f'again_for_errors.{hoy.day}_{hoy.month}_{hoy.year}_{hoy.hour}h{hoy.minute}m'
# newpath=createFolder(newfolder)
# er_df=stadium_df[stadium_df["Team"].isin(team_errors)]
# new_list=teamLoc(er_df)	
# downloadJSON(new_list, "restaurants", 3000, newpath, cat=False)

In [None]:
def newTry(team_errors, cat=False):
    hoy = datetime.today()
    newfolder=f'again_for_errors.{hoy.day}_{hoy.month}_{hoy.year}_{hoy.hour}h{hoy.minute}m'
    newpath=createFolder(newfolder, cat)
    er_df=stadium_df[stadium_df["Team"].isin(team_errors)]
    new_list=teamLoc(er_df)	
    downloadJSON(new_list, "restaurants", 3000, newpath, cat)
    return newpath

In [None]:
newerror_list= JSONcheck(newTry(team_errors, cat=False))

In [None]:
# Final check
error_list2 = JSONcheck(outpath)