In [1]:
import requests
from bs4 import BeautifulSoup, Comment
import time
import pandas as pd

In [5]:
url = "https://www.baseball-reference.com/register/league.cgi?code=FRON&class=Ind"
response = requests.get(url)
base_url = "https://www.baseball-reference.com"

def get_teams_by_year():
    teams_by_year = {}
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')

        table = soup.find('table', {'class': 'suppress_all sortable stats_table'})
        bodies = table.find_all('tbody')

        for body in bodies:
            rows = body.find_all('tr')

            for row in rows:
                year = row.find('th', {'data-stat': 'year_ID'}).find('a').text
                teams = [base_url + team['href'] for team in row.find('td', {'data-stat': 'team_ID'}).find_all('a')]
                team_names = [team.text for team in row.find('td', {'data-stat': 'team_ID'}).find_all('a')]

                if year not in teams_by_year:
                    teams_by_year[year] = list(zip(teams, team_names))
                else:
                    teams_by_year[year].extend(list(zip(teams, team_names)))
    return teams_by_year

def get_batting_df(year, teams_by_year):
    batting_data = []

    teams = teams_by_year[year]
    for team_url, team_name in teams:
        response = requests.get(team_url)
        soup = BeautifulSoup(response.content, 'html.parser')

        table = soup.find('table', {'class': 'sortable stats_table', 'id': 'team_batting'})
        if table is not None:
            data = []
            for row in table.find('tbody').find_all('tr'):
                row_data = [td.get_text(strip=True) for td in row.find_all(['td', 'th'])]
                data.append(row_data)

            columns = [th.get_text(strip=True) for th in table.find('thead').find_all('th')]
            batting_df = pd.DataFrame(data, columns=columns)
            batting_df['Team'] = team_name
            batting_data.append(batting_df)
            time.sleep(10)

    if batting_data:
        result_df = pd.concat(batting_data, ignore_index=True)
        return result_df
    else:
        return None

def get_pitching_df(year, teams_by_year):
    pitching_data = []

    teams = teams_by_year[year]
    for team_url, team_name in teams:
        response = requests.get(team_url)
        soup = BeautifulSoup(response.content, 'html.parser')
        comments = soup.find_all(string=lambda text: isinstance(text, Comment))
        table_comment = next((comment for comment in comments if 'team_pitching' in comment), None)
        table_soup = BeautifulSoup(table_comment, 'html.parser')
        table = table_soup.find('table', {'class': 'sortable stats_table', 'id': 'team_pitching'})
        if table is not None:
            data = []
            for row in table.find('tbody').find_all('tr'):
                row_data = [td.get_text(strip=True) for td in row.find_all(['td', 'th'])]
                data.append(row_data)
            columns = [th.get_text(strip=True) for th in table.find('thead').find_all('th')]
            pitching_df = pd.DataFrame(data, columns=columns)
            pitching_df['Team'] = team_name
            pitching_data.append(pitching_df)

            time.sleep(10)

    if pitching_data:
        result_df = pd.concat(pitching_data, ignore_index=True)
        return result_df
    else:
        return None

In [3]:
tby = get_teams_by_year()
batting_23 = get_batting_df('2023', tby)
batting_22 = get_batting_df('2022', tby)
batting_21 = get_batting_df('2021', tby)

In [6]:
pitching_23 = get_pitching_df('2023', tby)
pitching_22 = get_pitching_df('2022', tby)
pitching_21 = get_pitching_df('2021', tby)

In [8]:
batting_23.to_csv('../Joliet Slammers/Baseball Reference/2023 Frontier League Hitting.csv')
batting_22.to_csv('../Joliet Slammers/Baseball Reference/2022 Frontier League Hitting.csv')
batting_21.to_csv('../Joliet Slammers/Baseball Reference/2021 Frontier League Hitting.csv')

pitching_23.to_csv('../Joliet Slammers/Baseball Reference/2023 Frontier League Pitching.csv')
pitching_22.to_csv('../Joliet Slammers/Baseball Reference/2022 Frontier League Pitching.csv')
pitching_21.to_csv('../Joliet Slammers/Baseball Reference/2021 Frontier League Pitching.csv')