In [1]:
# Standard library imports
import os
import re

# Third-party library imports
import geocoder
import requests
import pandas as pd
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Local imports
from customScripts import state_mapping, all_states

# Load environment variables from .env file
load_dotenv()

# Access the Bing Maps API key from the environment variable
bing_api_key = os.getenv('BING_MAPS_API_KEY')

# Golf Digest

In [2]:
golf_digest_public_url = "https://www.golfdigest.com/story/americas-100-greatest-public-golf-courses-ranking"
response = requests.get(golf_digest_public_url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")

all_data = soup.find_all(class_ = "o-InfoBox")

all_courses = [x.find_all(class_ = "o-InfoBox__a-Title")[0].get_text(strip=True) for x in all_data]

course_locations = [x.find_all(class_ = "o-InfoBox__a-Subtitle")[0].get_text(strip=True) for x in all_data]

panelist_ratings = []
for course in all_data:
    try:
        panelist_ratings.append(2 * float(course.find_all(class_ = "o-InfoBox__a-Rating")[0].find('div', class_= 'o-Rating')['data-score']))
    except IndexError:
        panelist_ratings.append("-")

In [3]:
golf_digest_public = pd.DataFrame(columns=["CourseName","Played", "Date", "Notes", "CurrentRanking", "PastRanking","PanelistRating", "City", "State", "Country", "Architect", "Latitude", "Longitude"])

for course, course_location, panelist_rating in zip(all_courses, course_locations, panelist_ratings):
    current_ranking = course.split(".")[0]
    last_ranking = course.split(".")[1].split(")")[0].split("(")[1]
    course_name = course.split(") ")[-1]

    city = course_location.split(", ")[0]
    state = course_location.split(", ")[-1]

    location = geocoder.bing(f"{course_name} {city}, {state}", key=bing_api_key).json    
    latitude = location['lat']
    longtitude = location['lng']
    country = location['country']

    golf_digest_public.loc[len(golf_digest_public)] = [course_name, "", "", "", current_ranking, last_ranking, panelist_rating, city, state, country, "-", latitude, longtitude]

In [39]:
state = "Illinois"
url = f"https://www.golfdigest.com/places-to-play/collections/{state}-best-golf-courses-rankings"
response = requests.get(url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")

all_data = soup.find_all(class_ = "o-InfoBox")
all_data

[]

In [62]:
states_data =  {}

for state in all_states:
    states_data[state] = pd.DataFrame(columns=["CourseName","Played", "Date", "Notes", "CurrentRanking", "PastRanking", "PanelistRating", "City", "State", "Country", "Architect", "Latitude", "Longitude"])

for state, df in states_data.items():
    print()

    url = f"https://www.golfdigest.com/places-to-play/collections/{state.lower()}-best-golf-courses-rankings"
    response = requests.get(url)
    html_content = response.text
    soup = BeautifulSoup(html_content, "html.parser")

    all_data = soup.find_all(class_ = "o-InfoBox")

    all_courses = [x.find_all(class_ = "o-InfoBox__a-Title")[0].get_text(strip=True) for x in all_data]

    course_locations = [x.find_all(class_ = "o-InfoBox__a-Subtitle")[0].get_text(strip=True) for x in all_data]

    panelist_ratings = []
    for course in all_data:
        try:
            panelist_ratings.append(2 * float(course.find_all(class_ = "o-InfoBox__a-Rating")[0].find('div', class_= 'o-Rating')['data-score']))
        except IndexError:
            panelist_ratings.append("-")

    for course, course_location, panelist_rating in zip(all_courses, course_locations, panelist_ratings):

        try:
            current_ranking = re.search(r'(\d+)\s*\((NR|NEW|\d+)\)', course).group(1)
            last_ranking = re.search(r'\((NR|NEW|\d+)\)', course).group(1)
            course_name = re.search(r'\)\s*(\w.*)', course).group(1)

            city = course_location.split(", ")[0]
            state_acryn = course_location.split(", ")[-1]

        except Exception as e:
            print(f"Issue finding ranking data for {course} in {state}.\n{e}\n")

        try:
            location = geocoder.bing(f"{course_name} {city}, {state_acryn}", key=bing_api_key).json    
            latitude = location['lat']
            longtitude = location['lng']
            country = location['country']
        except TypeError:
            print(f"Issue finding location data for {course} in {state}.")
            latitude, longtitude, country = "Err.", "Err.", "United States"
        df.loc[len(df)] = [course_name, "", "", "", current_ranking, last_ranking, panelist_rating, city, state_acryn, country, "-", latitude, longtitude]

    #states_data.append((state, state_data))


Issue finding ranking data for 1. (1) Moose Run Golf Course: Creek in Alaska.
'NoneType' object has no attribute 'group'

Issue finding ranking data for 2. (NR) Anchorage Golf Course in Alaska.
'NoneType' object has no attribute 'group'

Issue finding ranking data for 3. (NR) Settlers Bay Golf Course in Alaska.
'NoneType' object has no attribute 'group'


Issue finding ranking data for 1. (1) Shoal Creek in Alabama.
'NoneType' object has no attribute 'group'

Issue finding ranking data for 2. (2) The Country Club of Birmingham: West in Alabama.
'NoneType' object has no attribute 'group'

Issue finding ranking data for 3. (3) The Ledges in Alabama.
'NoneType' object has no attribute 'group'

Issue finding ranking data for 4. (4) Turtle Point Yacht & Country Club in Alabama.
'NoneType' object has no attribute 'group'

Issue finding ranking data for 5. (6) Old Overton Club in Alabama.
'NoneType' object has no attribute 'group'

Issue finding ranking data for 6. (5) Robert Trent Jones Gol

KeyboardInterrupt: 

In [68]:
ts = "1. (1) Moose Run Golf Course: Creek"


match = re.search(r'^(\d+)', ts)
if match:
    print(f"Current Ranking: {match.group(1)}")

match = re.search(r'(\d+)\s*\((NR|NEW|\d+)\)', ts)
if match:
    print(f"Past ranking: {match.group(1)}")

Current Ranking: 1


# Golfweek

In [5]:
golfweek_public_courses_url = "https://golfweek.usatoday.com/lists/golfweeks-best-2023-courses-you-can-play-top-100-u-s-public-access-golf-ranking/"
response = requests.get(golfweek_public_courses_url)
html_content = response.text
soup = BeautifulSoup(html_content, "html.parser")
courses = [c.get_text() for c in soup.find_all('p') if (c.get_text()[0].isdigit() or c.get_text()[1].isdigit())]
courses = [c if c[0].isdigit() else c[1:] for c in courses]

In [6]:
golfweek_public = pd.DataFrame(columns=["CourseName","Played", "Date", "Notes", "CurrentRanking", "PastRanking","PanelistRating", "City", "State", "Country", "Architect", "Latitude", "Longitude"])

for count, course in enumerate(courses):

    current_ranking = course.split(".")[0]
    
    if "*" in course:
        last_ranking = "NEW"
    else:
        last_ranking = next((x for x in re.findall(r'(?:\((\d+)\)|T(\d+))', course.split("\n")[0])[0] if x), None)

    panelist_rating = float(re.search(r'\b\d+\.\d{2}\b', course).group())
    
    course_name_regex = re.search(r'\. (.+?)(?: \((?:T\d+|\*|\d+)\)|\*\s)', course)
    if course_name_regex:
        course_name = course_name_regex.group(1)


    location_regex = re.search(r'(?<=\n)(.+?)(?=\;)', course)
    if location_regex:
        city_and_state = location_regex.group(0).split(", ")

        city = city_and_state[0]
        state = state_mapping.get(city_and_state[1])

    location = geocoder.bing(f"{course_name} {city}, {state}", key=bing_api_key).json    
    latitude = location['lat']
    longtitude = location['lng']
    country = location['country']

    architect = re.findall(r'(?<=;\s)([^()]+)(?=\s\()', course)[0]

    golfweek_public.loc[len(golfweek_public)] = [course_name, "", "", "", current_ranking, last_ranking, panelist_rating, city, state, country, "-", latitude, longtitude]

# Create Excel Document

In [7]:
with pd.ExcelWriter('all_course_ratings.xlsx', engine='xlsxwriter') as excel_writer:
    golf_digest_public.to_excel(excel_writer, sheet_name="GD - Best USA Public 23-24", index=False)
    golfweek_public.to_excel(excel_writer, sheet_name="GW - Best USA Public 23-24", index=False)

    for state in states_data:
        states_data[state].to_excel(excel_writer, sheet_name = f"{state} - GD", index=False)
