In [None]:
from bs4 import BeautifulSoup
import pandas as pd

# Function to extract game data from a given BeautifulSoup object
def extract_game_data(soup):
    games = soup.find_all('div', class_='game_summary expanded nohover')
    
    # Initialize lists to store the data
    dates = []
    winner_teams = []
    winner_scores = []
    loser_teams = []
    loser_scores = []
    home_teams = []
    pass_yds_players = []
    pass_yds_teams = []
    pass_yds = []
    rush_yds_players = []
    rush_yds_teams = []
    rush_yds = []
    rec_yds_players = []
    rec_yds_teams = []
    rec_yds = []
    
    # Extract data for each game
    for game in games:
        date = game.find('tr', class_='date').td.text
        teams = game.find('table', class_='teams').tbody.find_all('tr')
        stats = game.find('table', class_='stats').tbody.find_all('tr')
        
        winner_team = teams[1].td.a.text
        winner_score = teams[1].find_all('td')[1].text
        loser_team = teams[2].td.a.text
        loser_score = teams[2].find_all('td')[1].text
        home_team = teams[2].td.a.text
        
        pass_yds_player_team = stats[0].find_all('td')[1].text.split('-')
        pass_yds_player = pass_yds_player_team[0]
        pass_yds_team = pass_yds_player_team[1] if len(pass_yds_player_team) > 1 else "N/A"
        pass_yds_value = stats[0].find_all('td')[2].text
        
        rush_yds_player_team = stats[1].find_all('td')[1].text.split('-')
        rush_yds_player = rush_yds_player_team[0]
        rush_yds_team = rush_yds_player_team[1] if len(rush_yds_player_team) > 1 else "N/A"
        rush_yds_value = stats[1].find_all('td')[2].text
        
        rec_yds_player_team = stats[2].find_all('td')[1].text.split('-')
        rec_yds_player = rec_yds_player_team[0]
        rec_yds_team = rec_yds_player_team[1] if len(rec_yds_player_team) > 1 else "N/A"
        rec_yds_value = stats[2].find_all('td')[2].text
        
        dates.append(date)
        winner_teams.append(winner_team)
        winner_scores.append(winner_score)
        loser_teams.append(loser_team)
        loser_scores.append(loser_score)
        home_teams.append(home_team)
        pass_yds_players.append(pass_yds_player)
        pass_yds_teams.append(pass_yds_team)
        pass_yds.append(pass_yds_value)
        rush_yds_players.append(rush_yds_player)
        rush_yds_teams.append(rush_yds_team)
        rush_yds.append(rush_yds_value)
        rec_yds_players.append(rec_yds_player)
        rec_yds_teams.append(rec_yds_team)
        rec_yds.append(rec_yds_value)
    
    # Create a DataFrame
    data = {
        'Date': dates,
        'Winner Team': winner_teams,
        'Winner Score': winner_scores,
        'Loser Team': loser_teams,
        'Loser Score': loser_scores,
        'Home Team': home_teams,
        'Pass Yds Player': pass_yds_players,
        'Pass Yds Team': pass_yds_teams,
        'Pass Yds': pass_yds,
        'Rush Yds Player': rush_yds_players,
        'Rush Yds Team': rush_yds_teams,
        'Rush Yds': rush_yds,
        'Rec Yds Player': rec_yds_players,
        'Rec Yds Team': rec_yds_teams,
        'Rec Yds': rec_yds
    }
    return pd.DataFrame(data)

# Initialize an empty DataFrame to hold all the data
df = pd.DataFrame()

# Loop through week 1 to week 18 and concatenate the data
for week in range(1, 19):
    # Load and parse the HTML file for the current week
    file_path = f'weeksInHtml2/week_{week}.html'
    with open(file_path, 'r') as file:
        content = file.read()
    soup = BeautifulSoup(content, 'html.parser')
    
    # Extract data for the current week
    week_df = extract_game_data(soup)
    week_df['Week'] = week
    
    # Append the current week's data to the main DataFrame
    df = pd.concat([df, week_df], ignore_index=True)

# Display the combined DataFrame

In [None]:
# Write the DataFrame to a CSV file
df.to_csv('2022_week_1_to_week_18_game_scores.csv', index=False)
