# The databases

This is the code I'm using to:

1. Get data from the internet on Premier League final results between 2008 and 2023
2. Select all teams in each season covered by this 15-year range
3. Draw career data for each player in the squad of all teams
4. Check how relevant they were in those squads
5. Get their number of total appearances and starts BEFORE the season in which they played for a PL team

## Initial imports

In [None]:
import requests
from bs4 import BeautifulSoup
import csv
import random
import time
import os
import re
from datetime import datetime

In [None]:
!cd

## PL table data scrapping

In [None]:
base_url = 'https://www.<yourWebsiteHere>.net'
schedule_url_pattern = base_url + '/schedule/eng-premier-league-{}-{}-spieltag/38/'

for season_start in range(2008, 2023):
    season_end = season_start + 1
    url = schedule_url_pattern.format(season_start, season_end)

    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    tables = soup.find_all('table', class_='standard_tabelle')

    filename = f'league_table/PL_league_table_{season_start}_{season_end}.csv'
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Position', 'Team', 'Team URL', 'Matches', 'Wins', 'Draws', 'Losses', 'Goals', 'Goal Difference', 'Points'])

        for table in tables:
            headers = [th.text for th in table.find_all('th')]
            if headers[:4] == ['#', 'Team', 'M.', 'W']:
                for row in table.find_all('tr')[1:]:
                    columns = row.find_all('td')
                    position = columns[0].text.strip()
                    team = columns[2].text.strip()
                    team_url = base_url + columns[2].a['href'] if columns[2].a else ''
                    matches = columns[3].text.strip()
                    wins = columns[4].text.strip()
                    draws = columns[5].text.strip()
                    losses = columns[6].text.strip()
                    goals = columns[7].text.strip()
                    goal_difference = columns[8].text.strip()
                    points = columns[9].text.strip()

                    writer.writerow([position, team, team_url, matches, wins, draws, losses, goals, goal_difference, points])

    print(f"Data extraction completed for the {season_start}-{season_end} season.")
    time.sleep(random.randint(5, 10))

## Getting PL teams in each year

In [None]:
all_teams_per_season = {}

for season_start in range(2008, 2023):
    season_end = season_start + 1
    filename = f'league_table/PL_league_table_{season_start}_{season_end}.csv'
    
    with open(filename, mode='r', encoding='utf-8') as file:
        reader = csv.reader(file)
        next(reader)
        all_teams = []
        for row in reader:
            team_position = row[0]
            team_name = row[1]
            team_url = row[2]
            all_teams.append((team_position, team_name, team_url))
        all_teams_per_season[f"{season_start}-{season_end}"] = all_teams

In [None]:
all_teams_per_season

## Grabbing squads for each PL season between 2008 and 2023

In [None]:
def fetch_player_data(url, league_position):
    response = requests.get(url)
    response.encoding = 'utf-8'
    soup = BeautifulSoup(response.text, 'html.parser')

    base_url = 'https://www.<yourWebsiteHere>.net'  # Base URL for concatenation
    players_data = {'Goalkeeper': [], 'Defender': [], 'Midfielder': [], 'Forward': []}
    current_position = None

    for row in soup.find_all('tr'):
        if row.th and row.th.get_text(strip=True) in players_data:
            current_position = row.th.get_text(strip=True)
        elif row.find_all('td') and current_position:
            cols = row.find_all('td')
            if len(cols) >= 6:  
                player_summary_link = base_url + cols[2].a['href'] if cols[2].a else None
                player_info = {
                    'league_position': league_position,
                    'name': cols[2].get_text(strip=True),
                    'position': current_position,
                    'number': cols[1].get_text(strip=True),
                    'country': cols[4].get_text(strip=True),
                    'dob': cols[5].get_text(strip=True),
                    'profile_url': player_summary_link
                }
                players_data[current_position].append(player_info)

    return players_data

In [None]:
os.makedirs('team_squads', exist_ok=True)

for season, teams in all_teams_per_season.items():
    season_end_year = season.split('-')[1]

    for team_position, team_name, team_base_url in teams:
        team_url = f'{team_base_url}{season_end_year}/2/'

        players_data = fetch_player_data(team_url, team_position)

        team_dir = f'team_squads/{team_name.replace(" ", "_")}'
        os.makedirs(team_dir, exist_ok=True)

        filename = f'{team_dir}/{team_name.replace(" ", "_")}_{season}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(['League Position', 'Position', 'Name', 'Number', 'Country', 'DOB', 'Profile URL'])

            for position, players in players_data.items():
                for player in players:
                    writer.writerow([player['league_position'], player['position'], player['name'],
                                     player['number'], player['country'], player['dob'], player['profile_url']])

        print(f"Data extraction completed for {team_name} in the {season} season.")
        time.sleep(random.randint(5, 10))

## Drawing career data for each player in a PL team

In [None]:
def fetch_player_career_data(player_url):
    career_data = []
    response = requests.get(player_url + '2/')
    soup = BeautifulSoup(response.text, 'html.parser')

    table = soup.find('table', class_='standard_tabelle')
    if table:
        rows = table.find_all('tr')[1:]
        for row in rows:
            cols = row.find_all('td')
            if len(cols) > 1:
                data = {
                    'League': cols[1].get_text(strip=True),
                    'Season': cols[2].get_text(strip=True),
                    'Team': cols[3].get_text(strip=True),
                    'Matches': cols[4].get_text(strip=True),
                    'Goals': cols[5].get_text(strip=True),
                    'Starting Line-Up': cols[6].get_text(strip=True),
                    'Substitute In': cols[7].get_text(strip=True),
                    'Substitute Out': cols[8].get_text(strip=True),
                    'Yellow Cards': cols[9].get_text(strip=True),
                    'Second Yellow Cards': cols[10].get_text(strip=True),
                    'Red Cards': cols[11].get_text(strip=True)
                }
                career_data.append(data)
    return career_data

In [None]:
def get_player_urls_from_csv(root_directory):
    all_player_urls = []
    for subdir, dirs, files in os.walk(root_directory):
        for filename in files:
            if filename.endswith('.csv'):
                filepath = os.path.join(subdir, filename)
                with open(filepath, mode='r', encoding='utf-8') as file:
                    reader = csv.DictReader(file)
                    for row in reader:
                        profile_url = row.get('Profile URL')
                        if profile_url:
                            all_player_urls.append(profile_url)
    return all_player_urls

In [None]:
team_squads_dir = 'team_squads'
player_urls = get_player_urls_from_csv(team_squads_dir)
player_urls = list(set(player_urls))

In [None]:
player_urls

In [None]:
output_directory = 'top_players'
os.makedirs(output_directory, exist_ok=True)

for player_url in player_urls:
    career_data = fetch_player_career_data(player_url)
    
    time.sleep(random.randint(5, 10))

    if career_data:
        player_name = player_url.rstrip('/').split('/')[-1]

        player_directory = os.path.join(output_directory, player_name)
        os.makedirs(player_directory, exist_ok=True)

        csv_file_path = os.path.join(player_directory, f'{player_name}_career_data.csv')

        with open(csv_file_path, 'w', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=career_data[0].keys())
            writer.writeheader()
            for data in career_data:
                writer.writerow(data)
        print(f"Career data for {player_name} saved to {csv_file_path}")
    else:
        print(f"No career data found for {player_url}")

## Check player's career history and PL season role

### Auxiliary functions

In [None]:
def is_before_season(player_season, current_season):
    try:
        if '/' in player_season:
            season_end_year = int(player_season.split('/')[1])
        else:
            season_end_year = int(player_season)
    except ValueError:
        return False

    current_season_end_year = int(current_season.split('-')[1])

    return season_end_year < current_season_end_year

In [None]:
def read_player_career_data_from_csv(csv_file_path):
    with open(csv_file_path, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        return list(reader)

In [None]:
def determine_role(starts):
    if starts >= 19:
        return "1st choice"
    elif 8 <= starts < 19:
        return "2nd choice"
    else:
        return "3rd choice"

In [None]:
def create_url_to_name_mapping(team_squads_dir):
    url_to_name = {}
    for subdir, dirs, files in os.walk(team_squads_dir):
        for file in files:
            if file.endswith('.csv'):
                filepath = os.path.join(subdir, file)
                with open(filepath, mode='r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        url_to_name[row['Profile URL']] = row['Name']
    return url_to_name

In [None]:
team_squads_dir = 'team_squads'
url_to_name_mapping = create_url_to_name_mapping(team_squads_dir)

In [None]:
url_to_name_mapping

In [None]:
def create_url_to_position_mapping(team_squads_dir):
    url_to_position = {}
    for subdir, dirs, files in os.walk(team_squads_dir):
        for file in files:
            if file.endswith('.csv'):
                filepath = os.path.join(subdir, file)
                with open(filepath, mode='r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        url_to_position[row['Profile URL']] = row['Position']
    return url_to_position

In [None]:
team_squads_dir = 'team_squads'
url_to_position_mapping = create_url_to_position_mapping(team_squads_dir)

In [None]:
url_to_position_mapping

In [None]:
def calculate_age(dob, end_date):
    if dob != 'Unknown':
        dob_date = datetime.strptime(dob, "%d/%m/%Y").date()
        end_date = end_date.date()
        age = (end_date - dob_date).days // 365
        return age
    else:
        return 'Unknown'

In [None]:
def create_url_to_dob_mapping(team_squads_dir):
    url_to_dob = {}
    for subdir, dirs, files in os.walk(team_squads_dir):
        for file in files:
            if file.endswith('.csv'):
                filepath = os.path.join(subdir, file)
                with open(filepath, mode='r', encoding='utf-8') as f:
                    reader = csv.DictReader(f)
                    for row in reader:
                        url_to_dob[row['Profile URL']] = row['DOB']
    return url_to_dob

In [None]:
team_squads_dir = 'team_squads'
url_to_dob_mapping = create_url_to_dob_mapping(team_squads_dir)

In [None]:
url_to_dob_mapping

In [None]:
def safe_int(value, default=0):
    try:
        return int(value)
    except ValueError:
        return default

In [None]:
def sum_starts_appearances(player_data, season, top_competitions):
    top_starts_before_season = 0
    top_appearances_before_season = 0
    total_starts_before_season = 0
    total_before_season = 0
    for d in player_data:
        if is_before_season(d['Season'], season):
            if d['League'] in top_competitions:
                top_starts_before_season += safe_int(d['Starting Line-Up'])
                top_appearances_before_season += safe_int(d['Matches'])
            total_starts_before_season += safe_int(d['Starting Line-Up'])
            total_before_season += safe_int(d['Matches'])
    return top_starts_before_season, top_appearances_before_season, total_starts_before_season, total_before_season

### Final data extraction

In [None]:
season_summaries_dir = 'season_summaries'
team_summaries_dir = 'team_summaries'
os.makedirs(season_summaries_dir, exist_ok=True)
os.makedirs(team_summaries_dir, exist_ok=True)

top_competitions = [
    "Primera", "EL", "Conf. League", "Ch. League", "UEFA Sup. Cup",
    "Pr. League", "CL QF", "EL QF", "FA Cup", "League Cup", "Copa del Rey",
    "UECL Qual.", "Ligue 1", "Bundesliga", "DFB-Pokal", "Coupe", "Coupe Ligue",
    "Serie A", "Coppa", "Copa Lib.", "Copa Sud."
]

player_directory = 'top_players'
fieldnames = [
    'League Position', 'Player Name', 'Player Position', 'Player URL', 'Team', 'Season', 'Role', 
    'Age at End of Season', 'PL Starts This Season', 'PL Appearances This Season',
    'Top Competition Starts Before Season', 'Top Competition Appearances Before Season',
    'Total Starts Before Season', 'Total Appearances Before Season'
]

In [None]:
for season, teams_tuples in all_teams_per_season.items():
    season_data = []
    team_positions = {team_name: position for position, team_name, _ in teams_tuples}

    for _, team_name, _ in teams_tuples:
        league_position = team_positions.get(team_name, 'Unknown')
        team_dir = os.path.join(team_summaries_dir, team_name.replace(" ", "_"))
        os.makedirs(team_dir, exist_ok=True)
        team_season_data = []

        for player_folder in os.listdir(player_directory):
            player_path = os.path.join(player_directory, player_folder, f'{player_folder}_career_data.csv')
            if os.path.exists(player_path):
                player_data = read_player_career_data_from_csv(player_path)
                
                for data in player_data:
                    data_season_formatted = data['Season'].replace('/', '-')
                    if data_season_formatted == season and data['League'] == 'Pr. League' and data['Team'] == team_name:
                        starts_this_season = safe_int(data['Starting Line-Up'])
                        appearances_this_season = safe_int(data['Matches'])
                        starts = safe_int(data['Starting Line-Up'])
                        role = determine_role(starts)
                        
                        top_starts_before_season, top_appearances_before_season, total_starts_before_season, total_before_season = sum_starts_appearances(player_data, season, top_competitions)

                        season_end_year = int(season.split('-')[1])
                        season_end_date = datetime(season_end_year, 6, 30)
                        player_url = f'https://www.<yourWebsiteHere>.net/player_summary/{player_folder}/'
                        player_dob = url_to_dob_mapping.get(player_url, 'Unknown')
                        if player_dob != 'Unknown':
                            player_age = calculate_age(player_dob, season_end_date)
                            print(f"Calculated age for {player_folder}: {player_age}")
                        else:
                            player_age = 'Unknown'
                        
                        player_name = url_to_name_mapping.get(player_url, 'Unknown')
                        player_position = url_to_position_mapping.get(player_url, 'Unknown')
                        player_summary = {
                            'League Position': league_position,
                            'Player Name': player_name,
                            'Player Position': player_position,
                            'Player URL': player_folder,
                            'Team': data['Team'],
                            'Season': season,
                            'Role': role,
                            'Age at End of Season': player_age,
                            'PL Starts This Season': starts_this_season,
                            'PL Appearances This Season': appearances_this_season,
                            'Top Competition Starts Before Season': top_starts_before_season,
                            'Top Competition Appearances Before Season': top_appearances_before_season,
                            'Total Starts Before Season': total_starts_before_season,
                            'Total Appearances Before Season': total_before_season
                        }
                        team_season_data.append(player_summary)
                        break

        team_csv_path = os.path.join(team_dir, f'{season.replace("/", "-")}_{team_name.replace(" ", "_")}_roles.csv')
        with open(team_csv_path, 'w', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(team_season_data)
        print(f'Saving CSV for {team_name} players in {season} season')
        
        season_data.extend(team_season_data)

    season_csv_path = os.path.join(season_summaries_dir, f'{season}_top_players.csv')
    with open(season_csv_path, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(season_data)
    print(f'Saving summary for the {season} season')

## Plus: extracting data from Chelsea's current season

In [None]:
base_url = 'https://www.<yourWebsiteHere>.net'
schedule_url_pattern = base_url + '/schedule/eng-premier-league-{}-{}-spieltag/38/'

for season_start in range(2023, 2024):
    season_end = season_start + 1
    url = schedule_url_pattern.format(season_start, season_end)

    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    tables = soup.find_all('table', class_='standard_tabelle')

    filename = f'league_table/PL_league_table_{season_start}_{season_end}.csv'
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Position', 'Team', 'Team URL', 'Matches', 'Wins', 'Draws', 'Losses', 'Goals', 'Goal Difference', 'Points'])

        for table in tables:
            headers = [th.text for th in table.find_all('th')]
            if headers[:4] == ['#', 'Team', 'M.', 'W']:
                for row in table.find_all('tr')[1:]:
                    columns = row.find_all('td')
                    position = columns[0].text.strip()
                    team = columns[2].text.strip()
                    team_url = base_url + columns[2].a['href'] if columns[2].a else ''
                    matches = columns[3].text.strip()
                    wins = columns[4].text.strip()
                    draws = columns[5].text.strip()
                    losses = columns[6].text.strip()
                    goals = columns[7].text.strip()
                    goal_difference = columns[8].text.strip()
                    points = columns[9].text.strip()

                    writer.writerow([position, team, team_url, matches, wins, draws, losses, goals, goal_difference, points])

    print(f"Data extraction completed for the {season_start}-{season_end} season.")
    time.sleep(random.randint(5, 10))

In [None]:
all_teams_2023_2024 = {}

for season_start in range(2023, 2024):
    season_end = season_start + 1
    filename = f'league_table/PL_league_table_{season_start}_{season_end}.csv'
    
    with open(filename, mode='r', encoding='utf-8') as file:
        reader = csv.reader(file)
        next(reader)
        all_teams = []
        for row in reader:
            team_position = row[0]
            team_name = row[1]
            team_url = row[2]
            all_teams.append((team_position, team_name, team_url))
        all_teams_2023_2024[f"{season_start}-{season_end}"] = all_teams

In [None]:
all_teams_2023_2024

In [None]:
os.makedirs('team_squads', exist_ok=True)

for season, teams in all_teams_2023_2024.items():
    season_end_year = season.split('-')[1]

    for team_position, team_name, team_base_url in teams:
        clean_team_name = team_name.replace("\n", " ").replace("(M,P)", "").replace("(N)", "").strip()
        team_url = f'{team_base_url}{season_end_year}/2/'

        players_data = fetch_player_data(team_url, team_position)

        team_dir = f'team_squads/{clean_team_name.replace(" ", "_")}'
        os.makedirs(team_dir, exist_ok=True)

        filename = f'{team_dir}/{clean_team_name.replace(" ", "_")}_{season}.csv'
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(['League Position', 'Position', 'Name', 'Number', 'Country', 'DOB', 'Profile URL'])

            for position, players in players_data.items():
                for player in players:
                    writer.writerow([player['league_position'], player['position'], player['name'],
                                     player['number'], player['country'], player['dob'], player['profile_url']])

        print(f"Data extraction completed for {clean_team_name} in the {season} season.")
        time.sleep(random.randint(5, 10))

In [None]:
def get_player_urls_from_2023_2024_csvs(root_directory):
    player_urls = []
    for subdir, dirs, files in os.walk(root_directory):
        for filename in files:
            if '2023-2024' in filename and filename.endswith('.csv'):
                filepath = os.path.join(subdir, filename)
                with open(filepath, mode='r', encoding='utf-8') as file:
                    reader = csv.DictReader(file)
                    for row in reader:
                        profile_url = row.get('Profile URL')
                        if profile_url:
                            player_urls.append(profile_url)
    return player_urls

In [None]:
team_squads_dir = 'team_squads'
player_urls = get_player_urls_from_2023_2024_csvs(team_squads_dir)

In [None]:
player_urls

In [None]:
def fetch_player_career_data_2023_2024(player_url):
    career_data = []
    response = requests.get(player_url + '2/')
    soup = BeautifulSoup(response.text, 'html.parser')

    table = soup.find('table', class_='standard_tabelle')
    if table:
        rows = table.find_all('tr')[1:]
        for row in rows:
            cols = row.find_all('td')
            if len(cols) > 1:
                data = {
                    'League': cols[1].get_text(strip=True),
                    'Season': cols[2].get_text(strip=True),
                    'Team': cols[3].get_text(strip=True),
                    'Matches': cols[4].get_text(strip=True),
                    'Goals': cols[5].get_text(strip=True),
                    'Starting Line-Up': cols[6].get_text(strip=True),
                    'Substitute In': cols[7].get_text(strip=True),
                    'Substitute Out': cols[8].get_text(strip=True),
                    'Yellow Cards': cols[9].get_text(strip=True),
                    'Second Yellow Cards': cols[10].get_text(strip=True),
                    'Red Cards': cols[11].get_text(strip=True)
                }
                career_data.append(data)
    return [data for data in career_data]

In [None]:
output_directory = 'top_players'
os.makedirs(output_directory, exist_ok=True)

for player_url in player_urls:
    career_data = fetch_player_career_data_2023_2024(player_url)
    
    time.sleep(random.randint(5, 10))

    if career_data:
        player_name = player_url.rstrip('/').split('/')[-1]

        player_directory = os.path.join(output_directory, player_name)
        os.makedirs(player_directory, exist_ok=True)

        csv_file_path = os.path.join(player_directory, f'{player_name}_career_data.csv')

        with open(csv_file_path, 'w', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=career_data[0].keys())
            writer.writeheader()
            for data in career_data:
                writer.writerow(data)
        print(f"Career data for {player_name} for 2023-2024 season saved to {csv_file_path}")
    else:
        print(f"No career data found for {player_url} for the 2023-2024 season")

### 2023-24 final data extraction

In [None]:
season_summaries_dir = 'season_summaries'
team_summaries_dir = 'team_summaries'
os.makedirs(season_summaries_dir, exist_ok=True)
os.makedirs(team_summaries_dir, exist_ok=True)

top_competitions = [
    "Primera", "EL", "Conf. League", "Ch. League", "UEFA Sup. Cup",
    "Pr. League", "CL QF", "EL QF", "FA Cup", "League Cup", "Copa del Rey",
    "UECL Qual.", "Ligue 1", "Bundesliga", "DFB-Pokal", "Coupe", "Coupe Ligue",
    "Serie A", "Coppa", "Copa Lib.", "Copa Sud."
]

player_directory = 'top_players'
fieldnames = [
    'League Position', 'Player Name', 'Player Position', 'Player URL', 'Team', 'Season', 'Role', 
    'Age at End of Season', 'PL Starts This Season', 'PL Appearances This Season',
    'Top Competition Starts Before Season', 'Top Competition Appearances Before Season',
    'Total Starts Before Season', 'Total Appearances Before Season'
]

In [None]:
all_teams_per_season = {}

for season_start in range(2023, 2024):
    season_end = season_start + 1
    filename = f'league_table/PL_league_table_{season_start}_{season_end}.csv'
    
    with open(filename, mode='r', encoding='utf-8') as file:
        reader = csv.reader(file)
        next(reader)
        all_teams = []
        for row in reader:
            team_position = row[0]
            team_name = row[1]
            team_url = row[2]
            all_teams.append((team_position, team_name, team_url))
        all_teams_per_season[f"{season_start}-{season_end}"] = all_teams

In [None]:
all_teams_per_season

In [None]:
season_summaries_dir = 'season_summaries'
team_summaries_dir = 'team_summaries'
os.makedirs(season_summaries_dir, exist_ok=True)
os.makedirs(team_summaries_dir, exist_ok=True)

target_season = '2023-2024'

clean_team_name = team_name.replace("\n", " ").replace("(M,P)", "").replace("(N)", "").strip()

In [None]:
for season, teams_tuples in all_teams_per_season.items():
    season_data = []
    team_positions = {team_name: position for position, team_name, _ in teams_tuples}

    for _, team_name, _ in teams_tuples:
        clean_team_name = team_name.replace("\n", " ").replace("(M,P)", "").replace("(N)", "").strip()
        league_position = team_positions.get(team_name, 'Unknown')
        team_dir = os.path.join(team_summaries_dir, clean_team_name.replace(" ", "_"))
        os.makedirs(team_dir, exist_ok=True)
        team_season_data = []

        for player_folder in os.listdir(player_directory):
            player_path = os.path.join(player_directory, player_folder, f'{player_folder}_career_data.csv')
            if os.path.exists(player_path):
                player_data = read_player_career_data_from_csv(player_path)
                
                for data in player_data:
                    data_season_formatted = data['Season'].replace('/', '-')
                    if data_season_formatted == target_season and data['League'] == 'Pr. League' and data['Team'] == team_name:
                        starts_this_season = safe_int(data['Starting Line-Up'])
                        appearances_this_season = safe_int(data['Matches'])
                        starts = safe_int(data['Starting Line-Up'])
                        role = determine_role(starts)
                        
                        top_starts_before_season, top_appearances_before_season, total_starts_before_season, total_before_season = sum_starts_appearances(player_data, season, top_competitions)

                        season_end_year = int(season.split('-')[1])
                        season_end_date = datetime(season_end_year, 6, 30)
                        player_url = f'https://www.<yourWebsiteHere>.net/player_summary/{player_folder}/'
                        player_dob = url_to_dob_mapping.get(player_url, 'Unknown')
                        if player_dob != 'Unknown':
                            player_age = calculate_age(player_dob, season_end_date)
                            print(f"Calculated age for {player_folder}: {player_age}")
                        else:
                            player_age = 'Unknown'
                        
                        player_name = url_to_name_mapping.get(player_url, 'Unknown')
                        player_position = url_to_position_mapping.get(player_url, 'Unknown')
                        player_summary = {
                            'League Position': league_position,
                            'Player Name': player_name,
                            'Player Position': player_position,
                            'Player URL': player_folder,
                            'Team': data['Team'],
                            'Season': season,
                            'Role': role,
                            'Age at End of Season': player_age,
                            'PL Starts This Season': starts_this_season,
                            'PL Appearances This Season': appearances_this_season,
                            'Top Competition Starts Before Season': top_starts_before_season,
                            'Top Competition Appearances Before Season': top_appearances_before_season,
                            'Total Starts Before Season': total_starts_before_season,
                            'Total Appearances Before Season': total_before_season
                        }
                        team_season_data.append(player_summary)
                        break

        team_csv_path = os.path.join(team_dir, f'{season.replace("/", "-")}_{clean_team_name.replace(" ", "_")}_roles.csv')
        with open(team_csv_path, 'w', newline='', encoding='utf-8') as file:
            writer = csv.DictWriter(file, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(team_season_data)
        print(f'Saving CSV for {clean_team_name} players in {season} season')
        
        season_data.extend(team_season_data)

    season_csv_path = os.path.join(season_summaries_dir, f'{season}_top_players.csv')
    with open(season_csv_path, 'w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(season_data)
    print(f'Saving summary for the {season} season')