In [1]:
# Standard library imports
import os
import re

# Third-party library imports
import geocoder
import requests
import importlib
import pandas as pd
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Local imports
import customScripts
importlib.reload(customScripts)
from customScripts import state_mapping, all_states, bing_api

# Load environment variables from .env file
load_dotenv()

# Access the Bing Maps API key from the environment variable
# .env file format:
# BING_MAPS_API_KEY="<api key>"
bing_api_key = os.getenv('BING_MAPS_API_KEY')

# Golf Digest

In [2]:
golf_digest_public_url = "https://www.golfdigest.com/story/americas-100-greatest-public-golf-courses-ranking"
response = requests.get(golf_digest_public_url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")

all_data = soup.find_all(class_ = "o-InfoBox")

all_courses = [x.find_all(class_ = "o-InfoBox__a-Title")[0].get_text(strip=True) for x in all_data]

course_locations = [x.find_all(class_ = "o-InfoBox__a-Subtitle")[0].get_text(strip=True) for x in all_data]

panelist_ratings = []
for course in all_data:
    try:
        panelist_ratings.append(2 * float(course.find_all(class_ = "o-InfoBox__a-Rating")[0].find('div', class_= 'o-Rating')['data-score']))
    except IndexError:
        panelist_ratings.append("-")

In [3]:
golf_digest_public = pd.DataFrame(columns=["CourseName","Played", "Date", "Notes", "CurrentRanking", "PastRanking","PanelistRating", "City", "State", "Country", "Architect", "Latitude", "Longitude"])

for course, course_location, panelist_rating in zip(all_courses, course_locations, panelist_ratings):
    current_ranking = course.split(".")[0]
    last_ranking = course.split(".")[1].split(")")[0].split("(")[1]
    course_name = course.split(") ")[-1]

    city = course_location.split(", ")[0]
    state = course_location.split(", ")[-1]

    location = geocoder.bing(f"{course_name} {city}, {state}", key=bing_api_key).json    
    latitude = location['lat']
    longitude = location['lng']
    country = location['country']

    golf_digest_public.loc[len(golf_digest_public)] = [course_name, "", "", "", current_ranking, last_ranking, panelist_rating, city, state, country, "-", latitude, longitude]

In [4]:
states_data =  {}

for state in all_states:
    states_data[state] = pd.DataFrame(columns=["CourseName","Played", "Date", "Notes", "CurrentRanking", "PastRanking", "PanelistRating", "City", "State", "Country", "Architect", "Latitude", "Longitude"])

for state, df in states_data.items():
    url = f"https://www.golfdigest.com/places-to-play/collections/{state.lower()}-best-golf-courses-rankings"
    response = requests.get(url)
    html_content = response.text
    soup = BeautifulSoup(html_content, "html.parser")

    all_data = soup.find_all(class_ = "o-InfoBox")

    all_courses = [x.find_all(class_ = "o-InfoBox__a-Title")[0].get_text(strip=True) for x in all_data]

    course_locations = []
    for x in all_data:
        try:
            course_locations.append(x.find_all(class_ = "o-InfoBox__a-Subtitle")[0].get_text(strip=True))
        except:
            course_locations.append("Ukn., Ukn.")
        
    
    panelist_ratings = []
    for course in all_data:
        try:
            panelist_ratings.append(2 * float(course.find_all(class_ = "o-InfoBox__a-Rating")[0].find('div', class_= 'o-Rating')['data-score']))
        except IndexError:
            panelist_ratings.append("-")

    for course, course_location, panelist_rating in zip(all_courses, course_locations, panelist_ratings):
        
        # Get Current Ranking for each course
        current_ranking_regex = re.match(r'^\d+', course)
        if current_ranking_regex:
            current_ranking = current_ranking_regex.group()
        else:
            current_ranking = "Err"
            print(f"Current ranking unable to be found for course {course} in {state}.")


        # Get Past Ranking for each course
        past_ranking_regex = re.search(r'(?<=\()\d+|NR|NEW(?=\))', course)
        if past_ranking_regex:
            past_ranking = past_ranking_regex.group()
        else:
            past_ranking = "Err"
            print(f"Past ranking unable to be found for course {course} in {state}.")


        # Get Course Name for each course
        course_name_regex = re.search(r'.*?(\(.*?\))\s*(.*)', course)
        if course_name_regex:
            course_name = course_name_regex.group(2)
        else:
            course_name = 'Err'
            print(f"Course name unable to be found for course {course} in {state}.")

        
        # Get Course City for each course
        try:
            city = course_location.split(", ")[0]
        except IndexError:
            city = "-"
            print(print(f"Course city  unable to be found for course {course} in {state}. course_location = {course_location}"))

        
        # Get Course State for each course
        try:
            state_label = course_location.split(", ")[-1]
        except IndexError:
            state_label = "-"
            print(print(f"Course state label unable to be found for course {course} in {state}. course_location = {course_location}"))


        # Get Course specific location for each course
        latitude, longitude, country = "Err", "Err", "Err"
        try:
            location = geocoder.bing(f"{course_name} {city}, {state_label}", key=bing_api_key).json
            try:
                latitude = location['lat']
                longitude = location["lng"]
                country = location["country"]

            except Exception as inner_error:
                print(f"Error getting course lat, lon, or country from Bing for course {course} in {state}. {inner_error}\n")
                try:
                    latitude, longitude, country = bing_api(course_name + " Golf", city, state_label, bing_api_key)
                except:
                    print("Adding 'Golf' to course name did not result in response from Bing Maps API.")

        except Exception as e:
            print(f'Error getting course location from Bing for course {course} in {state}.\n{e}\n')

        if city == "Ukn." or state_label == "Ukn.":
            latitude, longitude, country = "Ukn.", "Ukn.", "Ukn."


        # Add data to this State's dataFrame.        
        df.loc[len(df)] = [course_name, "", "", "", current_ranking, past_ranking, panelist_rating, city, state_label, country, "-", latitude, longitude]

Error getting course lat, lon, or country from Bing for course 25. (21) Cherokee Town & Country Club (North) in Georgia. 'NoneType' object is not subscriptable

Error getting course lat, lon, or country from Bing for course 24. (24) Exmoor Country Club in Illinois. 'NoneType' object is not subscriptable

Current ranking unable to be found for course The Grove in Tennessee.
Past ranking unable to be found for course The Grove in Tennessee.
Course name unable to be found for course The Grove in Tennessee.


# Golfweek

In [5]:
golfweek_public_courses_url = "https://golfweek.usatoday.com/lists/golfweeks-best-2023-courses-you-can-play-top-100-u-s-public-access-golf-ranking/"
response = requests.get(golfweek_public_courses_url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
courses = [c.get_text() for c in soup.find_all('p') if (c.get_text()[0].isdigit() or c.get_text()[1].isdigit())]
courses = [c if c[0].isdigit() else c[1:] for c in courses]

In [6]:
golfweek_public = pd.DataFrame(columns=["CourseName","Played", "Date", "Notes", "CurrentRanking", "PastRanking","PanelistRating", "City", "State", "Country", "Architect", "Latitude", "Longitude"])

for count, course in enumerate(courses):

    current_ranking = course.split(".")[0]
    
    if "*" in course:
        last_ranking = "NEW"
    else:
        last_ranking = next((x for x in re.findall(r'(?:\((\d+)\)|T(\d+))', course.split("\n")[0])[0] if x), None)

    panelist_rating = float(re.search(r'\b\d+\.\d{2}\b', course).group())
    
    course_name_regex = re.search(r'\. (.+?)(?: \((?:T\d+|\*|\d+)\)|\*\s)', course)
    if course_name_regex:
        course_name = course_name_regex.group(1)


    location_regex = re.search(r'(?<=\n)(.+?)(?=\;)', course)
    if location_regex:
        city_and_state = location_regex.group(0).split(", ")

        city = city_and_state[0]
        state = state_mapping.get(city_and_state[1])

    location = geocoder.bing(f"{course_name} {city}, {state}", key=bing_api_key).json    
    latitude = location['lat']
    longitude = location['lng']
    country = location['country']

    architect = re.findall(r'(?<=;\s)([^()]+)(?=\s\()', course)[0]

    golfweek_public.loc[len(golfweek_public)] = [course_name, "", "", "", current_ranking, last_ranking, panelist_rating, city, state, country, "-", latitude, longitude]

# Create Excel Documents

In [7]:
with pd.ExcelWriter('Excel Files/All Course Rankings.xlsx', engine='xlsxwriter') as excel_writer:
    golf_digest_public.to_excel(excel_writer, sheet_name="GD - Best USA Public 23-24", index=False)
    golfweek_public.to_excel(excel_writer, sheet_name="GW - Best USA Public 23-24", index=False)

    for state in states_data:
        states_data[state].to_excel(excel_writer, sheet_name = f"{state} - GD", index=False)


with pd.ExcelWriter("Excel Files/Golf Digest Top 100 Public Courses in the US.xlsx", engine='xlsxwriter') as excel_writer:
    golf_digest_public.to_excel(excel_writer, index=False)
    golf_digest_public.to_csv("CSV Files/Golf Digest Top 100 Public Courses in the US.csv", index=False)

with pd.ExcelWriter("Excel Files/Golfweek Top 100 Public Courses in the US.xlsx", engine='xlsxwriter') as excel_writer:
    golfweek_public.to_excel(excel_writer, index=False)
    golfweek_public.to_csv("CSV Files/Golfweek Top 100 Public Courses in the US.csv", index=False)


for state in states_data:
    with pd.ExcelWriter(f"Excel Files/Individual States/{state} - Golf Digest Top Courses in each State.xlsx", engine='xlsxwriter') as excel_writer:
        states_data[state].to_excel(excel_writer, index=False)
    states_data[state].to_csv(f"CSV Files/Individual States/{state} - Golf Digest Top Courses in each State.csv", index=False)
