# Team Scraper

In [8]:
# Third party libraries
import json
import os

# Local libraries
import Classes.TeamList as TeamList
import Classes.Team as Team
import Tools.json_utils as ju

FILENAME = "data_2024.json"
TEAM_URL = "https://www.sports-reference.com/cbb/schools/"
URL_SUFFIX = "2024-schedule.html"

In [16]:
def add_teams_to_json(team_list, filename: str, url_suffix: str):
    """Add all teams to the JSON file if they don't already exist

    Args:
        team_list: class holding data frame of all teams
        filename (str): Name of JSON team file
        url_suffix (str): suffix for URLs
    """

    for i in range(len(team_list.df["URL"])):
        team_url = team_list.df["URL"][i] + url_suffix
        school = team_list.df["School"][i]
        print(f"{i}. {school}")

        # Check if team exists already
        existing_data = None

        if os.path.exists(filename) and os.path.getsize(filename) > 0:
            with open(filename, "r") as file:
                existing_data = json.load(file)

        if existing_data:

            existing_teams = list(existing_data.keys())

            # Add data if it doesn't exist already
            if school not in existing_teams:

                team = Team.Team(url=team_url)

                if team.df is not None:
                    dct = {}
                    dct[school] = team.df.to_dict(orient="list")
                    ju.add_dictionary_to_json(dct=dct,
                                              filename=filename)
                    print(f"{school} added to JSON successfully.")           
                else:
                    print(f"{school} has no records.")                    
            else:
                print(f"{school} already exists in current JSON.")

    ju.print_team_win_loss(filename=filename)

# Team

In [None]:
test_url = "https://www.sports-reference.com/cbb/schools/kansas/2024-schedule.html"
team1 = Team.Team(url=test_url)

test_url = "https://www.sports-reference.com/cbb/schools/arkansas/men/2024-schedule.html"
team2 = Team.Team(url=test_url)

In [None]:
dct = {}
dct["Kansas"] = team1.df.to_dict(orient="list")
dct["Arkansas"] = team2.df.to_dict(orient="list")

In [None]:
ju.add_dictionary_to_json(dct=dct,
                          filename=FILENAME)
ju.print_team_win_loss(filename=FILENAME)

# Team List

In [3]:
team_list = TeamList.TeamList(url=TEAM_URL)

In [17]:
add_teams_to_json(team_list=team_list,
                  filename=FILENAME,
                  url_suffix=URL_SUFFIX)

0. Abilene Christian
Abilene Christian already exists in current JSON.
1. Air Force
Air Force already exists in current JSON.
2. Akron
Akron already exists in current JSON.
3. Alabama
Alabama already exists in current JSON.
4. Alabama A&M
Alabama A&M already exists in current JSON.
5. Alabama State
Alabama State already exists in current JSON.
6. Albany (NY)
Albany (NY) already exists in current JSON.
7. Alcorn State
Alcorn State already exists in current JSON.
8. Allegheny Gators
Value Error
Allegheny Gators has no records.
9. American
American already exists in current JSON.
10. Amherst Lord Jeffs
Value Error
Amherst Lord Jeffs has no records.
11. Appalachian State
Appalachian State already exists in current JSON.
12. Arizona
Arizona already exists in current JSON.
13. Arizona State
Arizona State already exists in current JSON.
14. Arkansas
Arkansas already exists in current JSON.
15. Arkansas State
Arkansas State already exists in current JSON.
16. Arkansas-Pine Bluff
Arkansas-Pine 