In [None]:
# Dependencies and Setup
from datetime import datetime
import pandas as pd
import numpy as np
import os
import requests
import json
from pprint import pprint
from dotenv import load_dotenv

In [None]:
# API Call
load_dotenv()
API_KEY = os.getenv("Yelp_API") # put your key here if you don't have a .env file in your main project folder.
headers = {'Authorization': 'Bearer %s' % API_KEY}
base_url = "https://api.yelp.com/v3/businesses/search"

In [None]:
# Stadiums/Arenas data folder
dataFolder='stadiums_data'

In [None]:
def createFolder(outFolder, cat=False):
    # Creates directory for current day of the week, raises an error if already exists.
    root_dir = 'json_files'
    if cat:
        parent_dir = os.path.join(root_dir, "Category_request")
    else:
        parent_dir = os.path.join(root_dir, "General_request")
        
    path = os.path.join(parent_dir, outFolder)
    try:
        os.mkdir(path)
    except:
        print('Folder already exists.')
        
    return path

In [None]:
def League(path=dataFolder, cat=False):
    today = datetime.today()
    liga=input("Input value:\nNFL, NBA, MLB, MLS, NHL: ")
    filename=f'{liga}.csv'
    filepath=os.path.join(path,filename)
    df=pd.read_csv(filepath, encoding='utf-8', dtype={"Team": "string", "City": "string", "State": "string", "Stadium Name": "string", 
                                    "Latitude": np.float64, "Longitude": np.float64})
    outputFolder=f'cat_{liga}_{today.day}_{today.month}_{today.year}_{today.hour}h{today.minute}m'
    output_path=createFolder(outputFolder, cat)
    return df, output_path   

In [None]:
def teamLoc(df):
    zipi=zip(df["Latitude"], df["Longitude"], df["Team"])
    return list(zipi)

In [None]:
class Exception_1(Exception):
    """ Raised if "total" is not found in the request """
    pass

In [None]:
# Function to download the JSON files
def downloadJSON(lista, term, radius, path, cat=False):
    count_f = 0
    # API parameters
    if cat:
        search_params = {"term": term, "limit": 20, "radius": radius, "sort_by": "rating",
                     "categories":("sportsbars","pubs","wine_bars", "cocktailbars")}
    else:
        search_params = {"term": term, "limit": 20, "radius": radius, "sort_by": "rating"}
        
    for latitude, longitude, team in lista:

        search_params.update({"latitude": latitude, "longitude": longitude})
        response = requests.get(url = base_url, params = search_params, headers = headers).json()
        try:

            if response['total']== False:
                raise Exception_1
            elif response['total'] >= 1000:
                total = 1000
            else:
                total = response['total']

            for search_offset in range(0, total, 20):

                search_params.update({"offset": search_offset})
                response2 = requests.get(url = base_url, params = search_params, headers = headers).json()
                file_name = f'yelp_response_{team}_{search_offset}.json'
                output_path = os.path.join(path, file_name)

                with open(output_path,'w', encoding = 'utf-8') as f:
                    json.dump(response2, f, ensure_ascii=False, indent =4)
                f.close()

        except Exception_1 as e:
            print("Error: 'total' not found.\nPlease try again later.")
            print(e)

        print(f'{count_f}, {team} total: {total}')

        count_f += 1

In [None]:
stadium_df, outpath=League(dataFolder, True)

In [None]:
stadium_df

In [None]:
stadium=teamLoc(stadium_df)

In [None]:
stadium

In [None]:
downloadJSON(stadium, "restaurants", 3000, outpath, True)

## Check json files integrity
Sometimes, due to an internal error in Yelp, the YELP API would return an empty file with an error code in it. 

Print to terminal if the json file has errors or not. 

If there are files with errors, the above request data needs to be re-run.

In [None]:
def JSONcheck(jsonpath):
    files_list = os.listdir(jsonpath)
    error_list = []
    for file in files_list:
        open_path = os.path.join(jsonpath, file)
        with open(open_path, 'r', encoding = 'utf-8') as f:
            contents = json.loads(f.read())
            try:
                total = contents['total']
            except:
                print(f"INTERNAL_ERROR in {file}")
                print("Something went wrong internally, please try downloading the json file again later.\n")
                file = f'{file}'
                error_list.append(file)
            else:
                print(f'\nNo errors found in the json file:\n{file}\n')
        
    if not error_list:
        print(f'No erros in json files.\nNo files to be removed for the analysis.')
    else:
        print(f'Files with errors:\n{error_list}')
    return error_list

In [None]:
error_list = JSONcheck(outpath)

In [None]:
error_list

In [None]:
def tryagain(lista, cat=False):
    if cat:
        pos=3
    else:
        pos=2
    team_errors=[]
    for file in lista:
        aux=file.split("_")[pos]
        team_errors.append(aux)
    return team_errors

In [None]:
team_errors=tryagain(error_list)
team_errors

## Try to download again the cities with errors

In [None]:
# hoy = datetime.today()
# newfolder=f'again_for_errors.{hoy.day}_{hoy.month}_{hoy.year}_{hoy.hour}h{hoy.minute}m'
# newpath=createFolder(newfolder)
# er_df=stadium_df[stadium_df["Team"].isin(team_errors)]
# new_list=teamLoc(er_df)	
# downloadJSON(new_list, "restaurants", 3000, newpath, cat=False)

In [None]:
def newTry(team_errors, cat=False):
    hoy = datetime.today()
    newfolder=f'again_for_errors.{hoy.day}_{hoy.month}_{hoy.year}_{hoy.hour}h{hoy.minute}m'
    newpath=createFolder(newfolder, cat)
    er_df=stadium_df[stadium_df["Team"].isin(team_errors)]
    new_list=teamLoc(er_df)	
    downloadJSON(new_list, "restaurants", 3000, newpath, cat)
    return newpath

In [None]:
newerror_list= JSONcheck(newTry(team_errors, cat=False))

In [None]:
# Final check
error_list2 = JSONcheck(outpath)