In [20]:
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
from datetime import datetime

In [2]:
url_main = 'https://www.basketball-reference.com/leagues/NBA_2024_games.html'

In [23]:
def parse_data_point(key: str, text: str) -> datetime | int | str:
    """
    Parses match data point based on its key.

    Parameters:
    key (str): Name of the data point.
    text (str): Text content of the data point

    Returns:
    (datetime): If the key is 'date'.
    (int): If the key is 'visitor_pts', 'home_pts' or 'overtime' (representing number of overtimes played).
    (string): For all other keys.
    """

    if key == 'date':
        return datetime.strptime(text, '%a, %b %d, %Y')
    
    elif key in ('visitor_pts' or 'home_pts'):
        return int(text)
    
    elif key == 'overtime':
        if text is None:
            return 0
        elif text == 'OT':
            return 1
        else:
            try:
                return int(text[:-2])
            except ValueError:
                return 0
            
    else:
        return text
    

def get_match_data(trow: Tag) -> dict:
    """
    Extracts NBA match data from a <tr> HTML element.

    Parameters:
    trow (bs4.element.Tag): A BeautifulSoup <tr> tag representing one row with data points about a single NBA match.

    Returns:
    dict: A dictionary with the following keys:
        - 'date' (datetime): The date of the game.
        - 'visitor_name' (str): Name of the visiting team.
        - 'visitor_pts' (int): Points scored by the visiting team.
        - 'home_name' (str): Name of the home team.
        - 'home_pts' (int): Points scored by the home team.
        - 'overtime' (int): Number of overtime periods.

    Notes:
    It relies on an external helper function `parse_data_point(key, text)` to handle value conversion.
    """

    match_data = {}
    data_fields = {
        'date':        ('th',  'date_game'),
        'visitor_name':('td',  'visitor_team_name'),
        'visitor_pts': ('td',  'visitor_pts'),
        'home_name':   ('td',  'home_team_name'),
        'home_pts':    ('td',  'home_pts'),
        'overtime':    ('td', 'overtimes')
    }

    for key, (tag, data_stat) in data_fields.items():
        text = trow.find(tag, {'data-stat': data_stat}).text.strip()
        data_point = parse_data_point(key, text)
        match_data[key] = data_point

    return match_data

try:
    response = requests.get(url_main)
    soup = BeautifulSoup(response.text, 'html.parser')
    table = soup.find('table', id='schedule')
    tbody = table.find('tbody')
    for trow in tbody.find_all('tr'):
        match_data = get_match_data(trow)
        print(match_data)
        break

except requests.exceptions.ConnectionError:
    print('Failed to connect to basketball-reference site')
except requests.exceptions.Timeout:
    print('The request timed out')
except requests.exceptions.RequestException as e:
    print(f'An error occured: {e}')

{'date': datetime.datetime(2023, 10, 24, 0, 0), 'visitor_name': 'Los Angeles Lakers', 'visitor_pts': 107, 'home_name': 'Denver Nuggets', 'home_pts': '119', 'overtime': 0}
