# Team Scraper

In [None]:
# Third party libraries
import json
import os

# Local libraries
import Classes.TeamList as TeamList
import Classes.Team as Team
import Classes.Tournament as Tournament
import Tools.json_utils as ju


YEAR = 2021
FILENAME = f"Data/data_{YEAR}.json"
TEAM_URL = "https://www.sports-reference.com/cbb/schools/"
URL_SUFFIX = f"{YEAR}-schedule.html"

In [None]:
def add_team_to_dictionary(team_url: str, school: str, filename: str):
    """Add data if file doesn't exist already

    Args:
        team_url (str): URL for team matchup data
        school (str): team involved in matchup
        filename (str): Name of JSON team file
    """
    
    team = Team.Team(url=team_url)

    if team.df is not None:
        dct = {}
        dct[school] = team.df.to_dict(orient="list")
        ju.add_dictionary_to_json(dct=dct,
                                  filename=filename)
        print(f"{school} added to JSON successfully.")
    else:
        print(f"{school} has no records.")


def add_teams_to_json(team_list, filename: str, url_suffix: str):
    """Add all teams to the JSON file if they don't already exist

    Args:
        team_list: class holding data frame of all teams
        filename (str): Name of JSON team file
        url_suffix (str): suffix for URLs
    """

    for i in range(len(team_list.df["URL"])):
        team_url = team_list.df["URL"][i] + url_suffix
        school = team_list.df["School"][i]
        print(f"{i}. {school}")

        existing_data = None

        # Check if file exists already
        if os.path.exists(filename) and os.path.getsize(filename) > 0:
            with open(filename, "r") as file:
                existing_data = json.load(file)

        if existing_data:

            existing_teams = list(existing_data.keys())

            # Add data if it doesn't exist already
            if school not in existing_teams:
                add_team_to_dictionary(team_url=team_url,
                                       school=school,
                                       filename=filename)
            else:
                print(f"{school} already exists in current JSON.")

        else:
            add_team_to_dictionary(team_url=team_url,
                                   school=school,
                                   filename=filename)

    ju.print_season_end_team_win_loss(filename=filename)

# Team

In [None]:
test_url = "https://www.sports-reference.com/cbb/schools/kansas/2024-schedule.html"
team1 = Team.Team(url=test_url)

test_url = "https://www.sports-reference.com/cbb/schools/arkansas/men/2024-schedule.html"
team2 = Team.Team(url=test_url)

In [None]:
dct = {}
dct["Kansas"] = team1.df.to_dict(orient="list")
dct["Arkansas"] = team2.df.to_dict(orient="list")

In [None]:
ju.add_dictionary_to_json(dct=dct,
                          filename=FILENAME)
ju.print_season_end_team_win_loss(filename=FILENAME)

# Team List

In [None]:
team_list = TeamList.TeamList(url=TEAM_URL)

In [None]:
add_teams_to_json(team_list=team_list,
                  filename=FILENAME,
                  url_suffix=URL_SUFFIX)