In [1]:
import pandas as pd
from tabulate import tabulate
import sqlite3

In [2]:
conn = sqlite3.connect('../Scraping/nfl.db')
# teams_df = pd.read_csv('../Scraping/scripts/Teams.csv')
# games_df = pd.read_csv('../Scraping/scripts/Games.csv')
# player_stats_df = pd.read_csv('../Scraping/scripts/PlayerStats.csv')

# Predict Upcoming Season Wins for Every Team

In [None]:
teams_df = pd.read_sql_query("SELECT * FROM Teams", conn)
games_df = pd.read_sql_query("SELECT * FROM Games", conn)

# Predict upcoming season wins for every team

# Step 1: Filter the game data to include only seasons since 2020
print("Step 1: Filtering data to include only seasons since 2020...")
games_df = games_df[games_df['season'] >= 2020]
print(f"Number of games after filtering: {len(games_df)}")

# Step 2: Calculate current wins and losses for each team
print("\nStep 2: Calculating current wins and losses for each team...")
games_df['home_win'] = games_df['home_score'] > games_df['away_score']
games_df['away_win'] = games_df['away_score'] > games_df['home_score']

home_wins = games_df.groupby('home_team')['home_win'].sum().reset_index()
away_wins = games_df.groupby('away_team')['away_win'].sum().reset_index()

home_wins.columns = ['Team', 'Home_Wins']
away_wins.columns = ['Team', 'Away_Wins']

total_wins = pd.merge(home_wins, away_wins, on='Team', how='outer').fillna(0)
total_wins['Total_Wins'] = total_wins['Home_Wins'] + total_wins['Away_Wins']

print(f"\nExample of current wins calculation:")
print(tabulate(total_wins.head(), headers='keys', tablefmt='pretty'))

# Step 3: Calculate total games and losses
print("\nStep 3: Calculating total games played and losses...")
home_games = games_df.groupby('home_team').size().reset_index(name='Home_Games')
away_games = games_df.groupby('away_team').size().reset_index(name='Away_Games')

home_games.columns = ['Team', 'Home_Games']
away_games.columns = ['Team', 'Away_Games']

total_games = pd.merge(home_games, away_games, on='Team', how='outer').fillna(0)
total_games['Total_Games'] = total_games['Home_Games'] + total_games['Away_Games']

total_losses = total_games.copy()
total_losses['Total_Losses'] = total_games['Total_Games'] - total_wins['Total_Wins']

print(f"\nExample of total games and losses calculation:")
print(tabulate(total_losses.head(), headers='keys', tablefmt='pretty'))

# Step 4: Merge team data
print("\nStep 4: Merging team data...")
team_records = pd.merge(total_wins[['Team', 'Total_Wins']], total_losses[['Team', 'Total_Losses']], on='Team')
team_records = pd.merge(team_records, teams_df[['TeamID', 'Team', 'Division']], left_on='Team', right_on='TeamID')
team_records.rename(columns={'Team_y': 'Team'}, inplace=True)

print(f"\nExample of merged team data:")
print(tabulate(team_records.head(), headers='keys', tablefmt='pretty'))

# Step 5: Calculate win percentage and project future performance
print("\nStep 5: Calculating win percentage and projecting future performance...")
team_records['Win_Percentage'] = team_records['Total_Wins'] / (team_records['Total_Wins'] + team_records['Total_Losses'])
games_left = 17 - (team_records['Total_Wins'] + team_records['Total_Losses'])
team_records['Predicted_Wins'] = (team_records['Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Wins']
team_records['Predicted_Losses'] = 17 - team_records['Predicted_Wins']

print(f"\nExample of win percentage and projected performance:")
print(tabulate(team_records[['Team', 'Win_Percentage', 'Predicted_Wins', 'Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Step 6: Recalculate strength of schedule
print("\nStep 6: Recalculating strength of schedule...")
league_avg_win_percentage = team_records['Win_Percentage'].mean()

# Calculate win percentage for home and away teams in the game data
games_df['home_win_percentage'] = games_df['home_team'].map(team_records.set_index('Team')['Win_Percentage'])
games_df['away_win_percentage'] = games_df['away_team'].map(team_records.set_index('Team')['Win_Percentage'])

# Fill any missing values with the league average win percentage
games_df['home_win_percentage'] = games_df['home_win_percentage'].fillna(league_avg_win_percentage)
games_df['away_win_percentage'] = games_df['away_win_percentage'].fillna(league_avg_win_percentage)

home_strength_of_schedule = games_df.groupby('home_team')['away_win_percentage'].mean().reset_index()
Away_Schedule_Strength = games_df.groupby('away_team')['home_win_percentage'].mean().reset_index()

home_strength_of_schedule.columns = ['Team', 'Home_Strength_of_Schedule']
Away_Schedule_Strength.columns = ['Team', 'Away_Schedule_Strength']

strength_of_schedule = pd.merge(home_strength_of_schedule, Away_Schedule_Strength, on='Team', how='outer').fillna(0)
strength_of_schedule['Average_Strength_of_Schedule'] = (strength_of_schedule['Home_Strength_of_Schedule'] + strength_of_schedule['Away_Schedule_Strength']) / 2

team_records = pd.merge(team_records, strength_of_schedule[['Team', 'Average_Strength_of_Schedule']], on='Team', how='left')

print(f"\nExample of strength of schedule calculation:")
print(tabulate(team_records[['Team', 'Average_Strength_of_Schedule']].head(), headers='keys', tablefmt='pretty'))

# Step 7: Adjust predictions based on strength of schedule
print("\nStep 7: Adjusting predictions based on strength of schedule...")
team_records['Adjusted_Win_Percentage'] = team_records['Win_Percentage'] * (1 + team_records['Average_Strength_of_Schedule'].fillna(0))

team_records['Adjusted_Predicted_Wins'] = (team_records['Adjusted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Wins']
team_records['Adjusted_Predicted_Losses'] = 17 - team_records['Adjusted_Predicted_Wins']

print(f"\nExample of adjusted predictions:")
print(tabulate(team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Final Output: Display the adjusted team records
print("\nFinal adjusted predictions based on the model:")
adjusted_records = team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']]
print(tabulate(adjusted_records.sort_values(by='Adjusted_Predicted_Wins', ascending=False), headers='keys', tablefmt='pretty'))

# # Close the database connection
# conn.close()


# v2 w/ Weighted Previous Season

In [3]:
teams_df = pd.read_sql_query("SELECT * FROM Teams", conn)
games_df = pd.read_sql_query("SELECT * FROM Games", conn)

# Define weights for each season
season_weights = {
    2023: 3,  # Most recent season
    2022: 2,
    2021: 1
}

# Step 1: Filter the game data to include only the last 3 seasons and apply weights
print("Step 1: Filtering data to include only the last 3 seasons and applying weights...")
games_df = games_df[games_df['season'] >= 2021]
games_df['season_weight'] = games_df['season'].map(season_weights)
print(f"Number of games after filtering: {len(games_df)}")

# Step 2: Calculate weighted wins and losses for each team
print("\nStep 2: Calculating weighted wins and losses for each team...")
games_df['home_win'] = games_df['home_score'] > games_df['away_score']
games_df['away_win'] = games_df['away_score'] > games_df['home_score']

games_df['weighted_home_win'] = games_df['home_win'] * games_df['season_weight']
games_df['weighted_away_win'] = games_df['away_win'] * games_df['season_weight']

home_wins = games_df.groupby('home_team')['weighted_home_win'].sum().reset_index()
away_wins = games_df.groupby('away_team')['weighted_away_win'].sum().reset_index()

home_wins.columns = ['Team', 'Weighted_Home_Wins']
away_wins.columns = ['Team', 'Weighted_Away_Wins']

total_wins = pd.merge(home_wins, away_wins, on='Team', how='outer').fillna(0)
total_wins['Total_Weighted_Wins'] = total_wins['Weighted_Home_Wins'] + total_wins['Weighted_Away_Wins']

print(f"\nExample of weighted wins calculation:")
print(tabulate(total_wins.head(), headers='keys', tablefmt='pretty'))

# Step 3: Calculate weighted total games and losses
print("\nStep 3: Calculating weighted total games played and losses...")
home_games = games_df.groupby('home_team')['season_weight'].sum().reset_index(name='Weighted_Home_Games')
away_games = games_df.groupby('away_team')['season_weight'].sum().reset_index(name='Weighted_Away_Games')

home_games.columns = ['Team', 'Weighted_Home_Games']
away_games.columns = ['Team', 'Weighted_Away_Games']

total_games = pd.merge(home_games, away_games, on='Team', how='outer').fillna(0)
total_games['Total_Weighted_Games'] = total_games['Weighted_Home_Games'] + total_games['Weighted_Away_Games']

total_losses = total_games.copy()
total_losses['Total_Weighted_Losses'] = total_games['Total_Weighted_Games'] - total_wins['Total_Weighted_Wins']

print(f"\nExample of weighted total games and losses calculation:")
print(tabulate(total_losses.head(), headers='keys', tablefmt='pretty'))

# Step 4: Merge team data with weights
print("\nStep 4: Merging weighted team data...")
team_records = pd.merge(total_wins[['Team', 'Total_Weighted_Wins']], total_losses[['Team', 'Total_Weighted_Losses']], on='Team')
team_records = pd.merge(team_records, teams_df[['TeamID', 'Team', 'Division']], left_on='Team', right_on='TeamID')
team_records.rename(columns={'Team_y': 'Team'}, inplace=True)

print(f"\nExample of merged weighted team data:")
print(tabulate(team_records.head(), headers='keys', tablefmt='pretty'))

# Step 5: Calculate weighted win percentage and project future performance
print("\nStep 5: Calculating weighted win percentage and projecting future performance...")
team_records['Weighted_Win_Percentage'] = team_records['Total_Weighted_Wins'] / (team_records['Total_Weighted_Wins'] + team_records['Total_Weighted_Losses'])
games_left = 17 - (team_records['Total_Weighted_Wins'] + team_records['Total_Weighted_Losses']).round().astype(int)
team_records['Predicted_Wins'] = (team_records['Weighted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Weighted_Wins']
team_records['Predicted_Losses'] = games_left - team_records['Predicted_Wins']

print(f"\nExample of weighted win percentage and projected performance:")
print(tabulate(team_records[['Team', 'Weighted_Win_Percentage', 'Predicted_Wins', 'Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Step 6: Recalculate strength of schedule with weights
print("\nStep 6: Recalculating strength of schedule with weights...")
league_avg_win_percentage = team_records['Weighted_Win_Percentage'].mean()

# Calculate win percentage for home and away teams in the game data
games_df['home_win_percentage'] = games_df['home_team'].map(team_records.set_index('Team')['Weighted_Win_Percentage'])
games_df['away_win_percentage'] = games_df['away_team'].map(team_records.set_index('Team')['Weighted_Win_Percentage'])

# Fill any missing values with the league average win percentage
games_df['home_win_percentage'] = games_df['home_win_percentage'].fillna(league_avg_win_percentage)
games_df['away_win_percentage'] = games_df['away_win_percentage'].fillna(league_avg_win_percentage)

home_strength_of_schedule = games_df.groupby('home_team')['away_win_percentage'].mean().reset_index()
Away_Schedule_Strength = games_df.groupby('away_team')['home_win_percentage'].mean().reset_index()

home_strength_of_schedule.columns = ['Team', 'Home_Strength_of_Schedule']
Away_Schedule_Strength.columns = ['Team', 'Away_Schedule_Strength']

strength_of_schedule = pd.merge(home_strength_of_schedule, Away_Schedule_Strength, on='Team', how='outer').fillna(0)
strength_of_schedule['Average_Strength_of_Schedule'] = (strength_of_schedule['Home_Strength_of_Schedule'] + strength_of_schedule['Away_Schedule_Strength']) / 2

team_records = pd.merge(team_records, strength_of_schedule[['Team', 'Average_Strength_of_Schedule']], on='Team', how='left')

print(f"\nExample of weighted strength of schedule calculation:")
print(tabulate(team_records[['Team', 'Average_Strength_of_Schedule']].head(), headers='keys', tablefmt='pretty'))

# Step 7: Adjust predictions based on weighted strength of schedule
print("\nStep 7: Adjusting predictions based on weighted strength of schedule...")
team_records['Adjusted_Win_Percentage'] = team_records['Weighted_Win_Percentage'] * (1 + team_records['Average_Strength_of_Schedule'].fillna(0))

team_records['Adjusted_Predicted_Wins'] = (team_records['Adjusted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Weighted_Wins']
team_records['Adjusted_Predicted_Losses'] = 17 - team_records['Adjusted_Predicted_Wins']

print(f"\nExample of adjusted predictions with weighted calculations:")
print(tabulate(team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Final Output: Display the adjusted team records
print("\nFinal adjusted predictions based on the model with weighted calculations:")
adjusted_records = team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']]
print(tabulate(adjusted_records.sort_values(by='Adjusted_Predicted_Wins', ascending=False), headers='keys', tablefmt='pretty'))

# # Close the database connection
# conn.close()


Step 1: Filtering data to include only the last 3 seasons and applying weights...
Number of games after filtering: 1126

Step 2: Calculating weighted wins and losses for each team...

Example of weighted wins calculation:
+---+------+--------------------+--------------------+---------------------+
|   | Team | Weighted_Home_Wins | Weighted_Away_Wins | Total_Weighted_Wins |
+---+------+--------------------+--------------------+---------------------+
| 0 | ARI  |        11.0        |        20.0        |        31.0         |
| 1 | ATL  |        29.0        |        13.0        |        42.0         |
| 2 | BAL  |        36.0        |        34.0        |        70.0         |
| 3 | BUF  |        47.0        |        29.0        |        76.0         |
| 4 | CAR  |        18.0        |        7.0         |        25.0         |
+---+------+--------------------+--------------------+---------------------+

Step 3: Calculating weighted total games played and losses...

Example of weighted t

# v3 w/ Weighted Upcoming Strength of Schedule

In [4]:
teams_df = pd.read_sql_query("SELECT * FROM Teams", conn)
games_df = pd.read_sql_query("SELECT * FROM Games", conn)

season_weights = {
    2023: 3,
    2022: 2,
    2021: 1
}

# Step 1: Filter the game data to include only the last 3 seasons and apply weights
games_df = games_df[games_df['season'] >= 2021]
games_df['season_weight'] = games_df['season'].map(season_weights)

# Step 2: Calculate weighted wins and losses for each team
games_df['home_win'] = games_df['home_score'] > games_df['away_score']
games_df['away_win'] = games_df['away_score'] > games_df['home_score']

games_df['weighted_home_win'] = games_df['home_win'] * games_df['season_weight']
games_df['weighted_away_win'] = games_df['away_win'] * games_df['season_weight']

home_wins = games_df.groupby('home_team')['weighted_home_win'].sum().reset_index()
away_wins = games_df.groupby('away_team')['weighted_away_win'].sum().reset_index()

home_wins.columns = ['Team', 'Weighted_Home_Wins']
away_wins.columns = ['Team', 'Weighted_Away_Wins']

total_wins = pd.merge(home_wins, away_wins, on='Team', how='outer').fillna(0)
total_wins['Total_Weighted_Wins'] = total_wins['Weighted_Home_Wins'] + total_wins['Weighted_Away_Wins']

# Step 3: Calculate weighted win percentage
team_records = total_wins.copy()
team_records['Weighted_Win_Percentage'] = team_records['Total_Weighted_Wins'] / team_records['Total_Weighted_Wins'].sum()

# Step 4: Calculate the strength of the upcoming schedule (2024)
upcoming_season = 2024
upcoming_games_df = games_df[games_df['season'] == upcoming_season].copy()

# Calculate the opponent strength for each team's upcoming schedule
upcoming_games_df['opponent_win_percentage'] = upcoming_games_df.apply(
    lambda row: team_records.set_index('Team')['Weighted_Win_Percentage'].get(row['away_team']) 
    if row['home_team'] == row['home_team'] 
    else team_records.set_index('Team')['Weighted_Win_Percentage'].get(row['home_team']), axis=1
)

# Calculate the average strength of schedule for each team
home_schedule_strength = upcoming_games_df.groupby('home_team')['opponent_win_percentage'].mean().reset_index()
away_schedule_strength = upcoming_games_df.groupby('away_team')['opponent_win_percentage'].mean().reset_index()

home_schedule_strength.columns = ['Team', 'Home_Schedule_Strength']
away_schedule_strength.columns = ['Team', 'Away_Schedule_Strength']

strength_of_schedule = pd.merge(home_schedule_strength, away_schedule_strength, on='Team', how='outer').fillna(0)
strength_of_schedule['Overall_Schedule_Strength'] = (strength_of_schedule['Home_Schedule_Strength'] + strength_of_schedule['Away_Schedule_Strength']) / 2

# Print the results
strength_of_schedule_sorted = strength_of_schedule[['Team', 'Overall_Schedule_Strength']].sort_values(by='Overall_Schedule_Strength', ascending=False)

strength_of_schedule_sorted

Unnamed: 0,Team,Overall_Schedule_Strength
15,KC,0.042792
28,SF,0.040288
3,BUF,0.038425
8,DAL,0.038201
2,BAL,0.03714
25,PHI,0.036047
6,CIN,0.035232
7,CLE,0.034792
26,PIT,0.034535
10,DET,0.034319


In [5]:
conn.close()

# Archive

In [None]:
# All together w/ upcoming schedule scores

import pandas as pd
import sqlite3
from tabulate import tabulate

# Reconnect to the SQLite database
conn = sqlite3.connect('../Scraping/nfl.db')
teams_df = pd.read_sql_query("SELECT * FROM Teams", conn)
games_df = pd.read_sql_query("SELECT * FROM Games", conn)

# Define weights for each season
season_weights = {
    2023: 3,  # Most recent season
    2022: 2,
    2021: 1
}

# Step 1: Filter the game data to include only the last 3 seasons and apply weights
print("Step 1: Filtering data to include only the last 3 seasons and applying weights...")
games_df = games_df[games_df['season'] >= 2021]
games_df['season_weight'] = games_df['season'].map(season_weights)
print(f"Number of games after filtering: {len(games_df)}")

# Step 2: Calculate weighted wins and losses for each team
print("\nStep 2: Calculating weighted wins and losses for each team...")
games_df['home_win'] = games_df['home_score'] > games_df['away_score']
games_df['away_win'] = games_df['away_score'] > games_df['home_score']

games_df['weighted_home_win'] = games_df['home_win'] * games_df['season_weight']
games_df['weighted_away_win'] = games_df['away_win'] * games_df['season_weight']

home_wins = games_df.groupby('home_team')['weighted_home_win'].sum().reset_index()
away_wins = games_df.groupby('away_team')['weighted_away_win'].sum().reset_index()

home_wins.columns = ['Team', 'Weighted_Home_Wins']
away_wins.columns = ['Team', 'Weighted_Away_Wins']

total_wins = pd.merge(home_wins, away_wins, on='Team', how='outer').fillna(0)
total_wins['Total_Weighted_Wins'] = total_wins['Weighted_Home_Wins'] + total_wins['Weighted_Away_Wins']

print(f"\nExample of weighted wins calculation:")
print(tabulate(total_wins.head(), headers='keys', tablefmt='pretty'))

# Step 3: Calculate weighted total games and losses
print("\nStep 3: Calculating weighted total games played and losses...")
home_games = games_df.groupby('home_team')['season_weight'].sum().reset_index(name='Weighted_Home_Games')
away_games = games_df.groupby('away_team')['season_weight'].sum().reset_index(name='Weighted_Away_Games')

home_games.columns = ['Team', 'Weighted_Home_Games']
away_games.columns = ['Team', 'Weighted_Away_Games']

total_games = pd.merge(home_games, away_games, on='Team', how='outer').fillna(0)
total_games['Total_Weighted_Games'] = total_games['Weighted_Home_Games'] + total_games['Weighted_Away_Games']

total_losses = total_games.copy()
total_losses['Total_Weighted_Losses'] = total_games['Total_Weighted_Games'] - total_wins['Total_Weighted_Wins']

print(f"\nExample of weighted total games and losses calculation:")
print(tabulate(total_losses.head(), headers='keys', tablefmt='pretty'))

# Step 4: Merge team data with weights
print("\nStep 4: Merging weighted team data...")
team_records = pd.merge(total_wins[['Team', 'Total_Weighted_Wins']], total_losses[['Team', 'Total_Weighted_Losses']], on='Team')
team_records = pd.merge(team_records, teams_df[['TeamID', 'Team', 'Division']], left_on='Team', right_on='TeamID')
team_records.rename(columns={'Team_y': 'Team'}, inplace=True)

print(f"\nExample of merged weighted team data:")
print(tabulate(team_records.head(), headers='keys', tablefmt='pretty'))

# Step 5: Calculate weighted win percentage and project future performance
print("\nStep 5: Calculating weighted win percentage and projecting future performance...")
team_records['Weighted_Win_Percentage'] = team_records['Total_Weighted_Wins'] / (team_records['Total_Weighted_Wins'] + team_records['Total_Weighted_Losses'])
games_left = 17 - (team_records['Total_Weighted_Wins'] + team_records['Total_Weighted_Losses']).round().astype(int)
team_records['Predicted_Wins'] = (team_records['Weighted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Weighted_Wins']
team_records['Predicted_Losses'] = games_left - team_records['Predicted_Wins']

print(f"\nExample of weighted win percentage and projected performance:")
print(tabulate(team_records[['Team', 'Weighted_Win_Percentage', 'Predicted_Wins', 'Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Step 6: Recalculate strength of schedule with weights
print("\nStep 6: Recalculating strength of schedule with weights...")
league_avg_win_percentage = team_records['Weighted_Win_Percentage'].mean()

# Calculate win percentage for home and away teams in the game data
games_df['home_win_percentage'] = games_df['home_team'].map(team_records.set_index('Team')['Weighted_Win_Percentage'])
games_df['away_win_percentage'] = games_df['away_team'].map(team_records.set_index('Team')['Weighted_Win_Percentage'])

# Fill any missing values with the league average win percentage
games_df['home_win_percentage'] = games_df['home_win_percentage'].fillna(league_avg_win_percentage)
games_df['away_win_percentage'] = games_df['away_win_percentage'].fillna(league_avg_win_percentage)

home_strength_of_schedule = games_df.groupby('home_team')['away_win_percentage'].mean().reset_index()
Away_Schedule_Strength = games_df.groupby('away_team')['home_win_percentage'].mean().reset_index()

home_strength_of_schedule.columns = ['Team', 'Home_Strength_of_Schedule']
Away_Schedule_Strength.columns = ['Team', 'Away_Schedule_Strength']

strength_of_schedule = pd.merge(home_strength_of_schedule, Away_Schedule_Strength, on='Team', how='outer').fillna(0)
strength_of_schedule['Average_Strength_of_Schedule'] = (strength_of_schedule['Home_Strength_of_Schedule'] + strength_of_schedule['Away_Schedule_Strength']) / 2

team_records = pd.merge(team_records, strength_of_schedule[['Team', 'Average_Strength_of_Schedule']], on='Team', how='left')

print(f"\nExample of weighted strength of schedule calculation:")
print(tabulate(team_records[['Team', 'Average_Strength_of_Schedule']].head(), headers='keys', tablefmt='pretty'))

# Step 7: Adjust predictions based on weighted strength of schedule
print("\nStep 7: Adjusting predictions based on weighted strength of schedule...")
team_records['Adjusted_Win_Percentage'] = team_records['Weighted_Win_Percentage'] * (1 + team_records['Average_Strength_of_Schedule'].fillna(0))

team_records['Adjusted_Predicted_Wins'] = (team_records['Adjusted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Weighted_Wins']
team_records['Adjusted_Predicted_Losses'] = 17 - team_records['Adjusted_Predicted_Wins']

print(f"\nExample of adjusted predictions with weighted calculations:")
print(tabulate(team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Final Output: Display the adjusted team records
print("\nFinal adjusted predictions based on the model with weighted calculations:")
adjusted_records = team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']]
print(tabulate(adjusted_records.sort_values(by='Adjusted_Predicted_Wins', ascending=False), headers='keys', tablefmt='pretty'))

# Load data from the database
teams_df = pd.read_sql_query("SELECT * FROM Teams", conn)
games_df = pd.read_sql_query("SELECT * FROM Games", conn)

# Define weights for each season
season_weights = {
    2023: 3,  # Most recent season
    2022: 2,
    2021: 1
}

# Step 1: Filter the game data to include only the last 3 seasons and apply weights
print("Step 1: Filtering data to include only the last 3 seasons and applying weights...")
games_df = games_df[games_df['season'] >= 2021]
games_df['season_weight'] = games_df['season'].map(season_weights)
print(f"Number of games after filtering: {len(games_df)}")

# Step 2: Calculate weighted wins and losses for each team
print("\nStep 2: Calculating weighted wins and losses for each team...")
games_df['home_win'] = games_df['home_score'] > games_df['away_score']
games_df['away_win'] = games_df['away_score'] > games_df['home_score']

games_df['weighted_home_win'] = games_df['home_win'] * games_df['season_weight']
games_df['weighted_away_win'] = games_df['away_win'] * games_df['season_weight']

home_wins = games_df.groupby('home_team')['weighted_home_win'].sum().reset_index()
away_wins = games_df.groupby('away_team')['weighted_away_win'].sum().reset_index()

home_wins.columns = ['Team', 'Weighted_Home_Wins']
away_wins.columns = ['Team', 'Weighted_Away_Wins']

total_wins = pd.merge(home_wins, away_wins, on='Team', how='outer').fillna(0)
total_wins['Total_Weighted_Wins'] = total_wins['Weighted_Home_Wins'] + total_wins['Weighted_Away_Wins']

print(f"\nExample of weighted wins calculation:")
print(tabulate(total_wins.head(), headers='keys', tablefmt='pretty'))

# Step 3: Calculate weighted total games and losses
print("\nStep 3: Calculating weighted total games played and losses...")
home_games = games_df.groupby('home_team')['season_weight'].sum().reset_index(name='Weighted_Home_Games')
away_games = games_df.groupby('away_team')['season_weight'].sum().reset_index(name='Weighted_Away_Games')

home_games.columns = ['Team', 'Weighted_Home_Games']
away_games.columns = ['Team', 'Weighted_Away_Games']

total_games = pd.merge(home_games, away_games, on='Team', how='outer').fillna(0)
total_games['Total_Weighted_Games'] = total_games['Weighted_Home_Games'] + total_games['Weighted_Away_Games']

total_losses = total_games.copy()
total_losses['Total_Weighted_Losses'] = total_games['Total_Weighted_Games'] - total_wins['Total_Weighted_Wins']

print(f"\nExample of weighted total games and losses calculation:")
print(tabulate(total_losses.head(), headers='keys', tablefmt='pretty'))

# Step 4: Merge team data with weights
print("\nStep 4: Merging weighted team data...")
team_records = pd.merge(total_wins[['Team', 'Total_Weighted_Wins']], total_losses[['Team', 'Total_Weighted_Losses']], on='Team')
team_records = pd.merge(team_records, teams_df[['TeamID', 'Team', 'Division']], left_on='Team', right_on='TeamID')
team_records.rename(columns={'Team_y': 'Team'}, inplace=True)

print(f"\nExample of merged weighted team data:")
print(tabulate(team_records.head(), headers='keys', tablefmt='pretty'))

# Step 5: Calculate weighted win percentage and project future performance
print("\nStep 5: Calculating weighted win percentage and projecting future performance...")
team_records['Weighted_Win_Percentage'] = team_records['Total_Weighted_Wins'] / (team_records['Total_Weighted_Wins'] + team_records['Total_Weighted_Losses'])
games_left = 17 - (team_records['Total_Weighted_Wins'] + team_records['Total_Weighted_Losses']).round().astype(int)
team_records['Predicted_Wins'] = (team_records['Weighted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Weighted_Wins']
team_records['Predicted_Losses'] = games_left - team_records['Predicted_Wins']

print(f"\nExample of weighted win percentage and projected performance:")
print(tabulate(team_records[['Team', 'Weighted_Win_Percentage', 'Predicted_Wins', 'Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Step 6: Recalculate strength of schedule with weights
print("\nStep 6: Recalculating strength of schedule with weights...")
league_avg_win_percentage = team_records['Weighted_Win_Percentage'].mean()

# Calculate win percentage for home and away teams in the game data
games_df['home_win_percentage'] = games_df['home_team'].map(team_records.set_index('Team')['Weighted_Win_Percentage'])
games_df['away_win_percentage'] = games_df['away_team'].map(team_records.set_index('Team')['Weighted_Win_Percentage'])

# Fill any missing values with the league average win percentage
games_df['home_win_percentage'] = games_df['home_win_percentage'].fillna(league_avg_win_percentage)
games_df['away_win_percentage'] = games_df['away_win_percentage'].fillna(league_avg_win_percentage)

home_strength_of_schedule = games_df.groupby('home_team')['away_win_percentage'].mean().reset_index()
Away_Schedule_Strength = games_df.groupby('away_team')['home_win_percentage'].mean().reset_index()

home_strength_of_schedule.columns = ['Team', 'Home_Strength_of_Schedule']
Away_Schedule_Strength.columns = ['Team', 'Away_Schedule_Strength']

strength_of_schedule = pd.merge(home_strength_of_schedule, Away_Schedule_Strength, on='Team', how='outer').fillna(0)
strength_of_schedule['Average_Strength_of_Schedule'] = (strength_of_schedule['Home_Strength_of_Schedule'] + strength_of_schedule['Away_Schedule_Strength']) / 2

team_records = pd.merge(team_records, strength_of_schedule[['Team', 'Average_Strength_of_Schedule']], on='Team', how='left')

print(f"\nExample of weighted strength of schedule calculation:")
print(tabulate(team_records[['Team', 'Average_Strength_of_Schedule']].head(), headers='keys', tablefmt='pretty'))

# Step 7: Adjust predictions based on weighted strength of schedule
print("\nStep 7: Adjusting predictions based on weighted strength of schedule...")
team_records['Adjusted_Win_Percentage'] = team_records['Weighted_Win_Percentage'] * (1 + team_records['Average_Strength_of_Schedule'].fillna(0))

team_records['Adjusted_Predicted_Wins'] = (team_records['Adjusted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Weighted_Wins']
team_records['Adjusted_Predicted_Losses'] = 17 - team_records['Adjusted_Predicted_Wins']

print(f"\nExample of adjusted predictions with weighted calculations:")
print(tabulate(team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# Final Output: Display the adjusted team records
print("\nFinal adjusted predictions based on the model with weighted calculations:")
adjusted_records = team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']]
print(tabulate(adjusted_records.sort_values(by='Adjusted_Predicted_Wins', ascending=False), headers='keys', tablefmt='pretty'))

# Step 8: Calculate the strength of the upcoming schedule (2024)
print("\nStep 8: Calculating the strength of the upcoming schedule...")

# Create a mapping dictionary for team abbreviations to their full names
team_mapping_manual = {
    'ARI': 'Arizona Cardinals',
    'ATL': 'Atlanta Falcons',
    'BAL': 'Baltimore Ravens',
    'BUF': 'Buffalo Bills',
    'CAR': 'Carolina Panthers',
    'CHI': 'Chicago Bears',
    'CIN': 'Cincinnati Bengals',
    'CLE': 'Cleveland Browns',
    'DAL': 'Dallas Cowboys',
    'DEN': 'Denver Broncos',
    'DET': 'Detroit Lions',
    'GB': 'Green Bay Packers',
    'HOU': 'Houston Texans',
    'IND': 'Indianapolis Colts',
    'JAX': 'Jacksonville Jaguars',
    'KC': 'Kansas City Chiefs',
    'LAC': 'Los Angeles Chargers',
    'LAR': 'Los Angeles Rams',
    'LV': 'Las Vegas Raiders',
    'MIA': 'Miami Dolphins',
    'MIN': 'Minnesota Vikings',
    'NE': 'New England Patriots',
    'NO': 'New Orleans Saints',
    'NYG': 'New York Giants',
    'NYJ': 'New York Jets',
    'PHI': 'Philadelphia Eagles',
    'PIT': 'Pittsburgh Steelers',
    'SEA': 'Seattle Seahawks',
    'SF': 'San Francisco 49ers',
    'TB': 'Tampa Bay Buccaneers',
    'TEN': 'Tennessee Titans',
    'WAS': 'Washington Commanders'
}

# Step 8: Calculate the strength of the upcoming schedule (2024)
upcoming_season = 2024
upcoming_games_df = games_df[games_df['season'] == upcoming_season].copy()

# Apply the mapping to convert abbreviations to full team names
upcoming_games_df['home_team_full'] = upcoming_games_df['home_team'].map(team_mapping_manual)
upcoming_games_df['away_team_full'] = upcoming_games_df['away_team'].map(team_mapping_manual)

# Calculate the opponent strength for each team's upcoming schedule
upcoming_games_df['opponent_win_percentage'] = upcoming_games_df.apply(
    lambda row: team_records.set_index('Team')['Weighted_Win_Percentage'].get(row['away_team_full'])
    if pd.notna(row['home_team_full']) else team_records.set_index('Team')['Weighted_Win_Percentage'].get(row['home_team_full']), axis=1
)

# Calculate the average strength of schedule for each team
home_schedule_strength = upcoming_games_df.groupby('home_team_full')['opponent_win_percentage'].mean().reset_index()
away_schedule_strength = upcoming_games_df.groupby('away_team_full')['opponent_win_percentage'].mean().reset_index()

home_schedule_strength.columns = ['Team', 'Home_Schedule_Strength']
away_schedule_strength.columns = ['Team', 'Away_Strength_of_Schedule']  # Correct column name

# Merge home and away schedule strength
strength_of_schedule = pd.merge(home_schedule_strength, away_schedule_strength, on='Team', how='outer').fillna(0)
strength_of_schedule['Overall_Schedule_Strength'] = (strength_of_schedule['Home_Schedule_Strength'] + strength_of_schedule['Away_Strength_of_Schedule']) / 2

# Print the final results
strength_of_schedule_sorted = strength_of_schedule[['Team', 'Overall_Schedule_Strength']].sort_values(by='Overall_Schedule_Strength', ascending=False)
print("\nUpcoming Strength of Schedule for Each Team:")
print(tabulate(strength_of_schedule_sorted, headers='keys', tablefmt='pretty'))

# Close the database connection
conn.close()


In [None]:
# Print teams upcoming schedules 

upcoming_games_df = games_df[games_df['season'] == 2024]
teams = upcoming_games_df['home_team'].unique()

for team in teams:
    # Filter the upcoming games for the specific team
    team_schedule_home = upcoming_games_df[upcoming_games_df['home_team'] == team][['home_team', 'away_team', 'season', 'week']]
    team_schedule_away = upcoming_games_df[upcoming_games_df['away_team'] == team][['home_team', 'away_team', 'season', 'week']]
    
    # Combine home and away schedules
    team_schedule = pd.concat([team_schedule_home, team_schedule_away]).sort_values(by='week')
    
    # Print the schedule using tabulate
    print(f"\nUpcoming Schedule for {team}:\n")
    print(tabulate(team_schedule, headers='keys', tablefmt='pretty'))
    print("\n" + "="*60 + "\n")


In [None]:
# Predict upcoming season wins for every team (csv)

# # Step 1: Filter the game data to include only seasons since 2020
# print("Step 1: Filtering data to include only seasons since 2020...")
# games_df = games_df[games_df['season'] >= 2020]
# print(f"Number of games after filtering: {len(games_df)}")

# # Step 2: Calculate current wins and losses for each team
# print("\nStep 2: Calculating current wins and losses for each team...")
# games_df['home_win'] = games_df['home_score'] > games_df['away_score']
# games_df['away_win'] = games_df['away_score'] > games_df['home_score']

# home_wins = games_df.groupby('home_team')['home_win'].sum().reset_index()
# away_wins = games_df.groupby('away_team')['away_win'].sum().reset_index()

# home_wins.columns = ['Team', 'Home_Wins']
# away_wins.columns = ['Team', 'Away_Wins']

# total_wins = pd.merge(home_wins, away_wins, on='Team', how='outer').fillna(0)
# total_wins['Total_Wins'] = total_wins['Home_Wins'] + total_wins['Away_Wins']

# print(f"\nExample of current wins calculation:")
# print(tabulate(total_wins.head(), headers='keys', tablefmt='pretty'))

# # Step 3: Calculate total games and losses
# print("\nStep 3: Calculating total games played and losses...")
# home_games = games_df.groupby('home_team').size().reset_index(name='Home_Games')
# away_games = games_df.groupby('away_team').size().reset_index(name='Away_Games')

# home_games.columns = ['Team', 'Home_Games']
# away_games.columns = ['Team', 'Away_Games']

# total_games = pd.merge(home_games, away_games, on='Team', how='outer').fillna(0)
# total_games['Total_Games'] = total_games['Home_Games'] + total_games['Away_Games']

# total_losses = total_games.copy()
# total_losses['Total_Losses'] = total_games['Total_Games'] - total_wins['Total_Wins']

# print(f"\nExample of total games and losses calculation:")
# print(tabulate(total_losses.head(), headers='keys', tablefmt='pretty'))

# # Step 4: Merge team data
# print("\nStep 4: Merging team data...")
# team_records = pd.merge(total_wins[['Team', 'Total_Wins']], total_losses[['Team', 'Total_Losses']], on='Team')
# team_records = pd.merge(team_records, teams_df[['TeamID', 'Team', 'Division']], left_on='Team', right_on='TeamID')
# team_records.rename(columns={'Team_y': 'Team'}, inplace=True)

# print(f"\nExample of merged team data:")
# print(tabulate(team_records.head(), headers='keys', tablefmt='pretty'))

# # Step 5: Calculate win percentage and project future performance
# print("\nStep 5: Calculating win percentage and projecting future performance...")
# team_records['Win_Percentage'] = team_records['Total_Wins'] / (team_records['Total_Wins'] + team_records['Total_Losses'])
# games_left = 17 - (team_records['Total_Wins'] + team_records['Total_Losses'])
# team_records['Predicted_Wins'] = (team_records['Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Wins']
# team_records['Predicted_Losses'] = 17 - team_records['Predicted_Wins']

# print(f"\nExample of win percentage and projected performance:")
# print(tabulate(team_records[['Team', 'Win_Percentage', 'Predicted_Wins', 'Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# # Step 6: Recalculate strength of schedule
# print("\nStep 6: Recalculating strength of schedule...")
# league_avg_win_percentage = team_records['Win_Percentage'].mean()

# # Calculate win percentage for home and away teams in the game data
# games_df['home_win_percentage'] = games_df['home_team'].map(team_records.set_index('Team')['Win_Percentage'])
# games_df['away_win_percentage'] = games_df['away_team'].map(team_records.set_index('Team')['Win_Percentage'])

# # Fill any missing values with the league average win percentage
# games_df['home_win_percentage'] = games_df['home_win_percentage'].fillna(league_avg_win_percentage)
# games_df['away_win_percentage'] = games_df['away_win_percentage'].fillna(league_avg_win_percentage)

# home_strength_of_schedule = games_df.groupby('home_team')['away_win_percentage'].mean().reset_index()
# Away_Schedule_Strength = games_df.groupby('away_team')['home_win_percentage'].mean().reset_index()

# home_strength_of_schedule.columns = ['Team', 'Home_Strength_of_Schedule']
# Away_Schedule_Strength.columns = ['Team', 'Away_Schedule_Strength']

# strength_of_schedule = pd.merge(home_strength_of_schedule, Away_Schedule_Strength, on='Team', how='outer').fillna(0)
# strength_of_schedule['Average_Strength_of_Schedule'] = (strength_of_schedule['Home_Strength_of_Schedule'] + strength_of_schedule['Away_Schedule_Strength']) / 2

# team_records = pd.merge(team_records, strength_of_schedule[['Team', 'Average_Strength_of_Schedule']], on='Team', how='left')

# print(f"\nExample of strength of schedule calculation:")
# print(tabulate(team_records[['Team', 'Average_Strength_of_Schedule']].head(), headers='keys', tablefmt='pretty'))

# # Step 7: Adjust predictions based on strength of schedule
# print("\nStep 7: Adjusting predictions based on strength of schedule...")
# team_records['Adjusted_Win_Percentage'] = team_records['Win_Percentage'] * (1 + team_records['Average_Strength_of_Schedule'].fillna(0))

# team_records['Adjusted_Predicted_Wins'] = (team_records['Adjusted_Win_Percentage'] * games_left).round().astype(int) + team_records['Total_Wins']
# team_records['Adjusted_Predicted_Losses'] = 17 - team_records['Adjusted_Predicted_Wins']

# print(f"\nExample of adjusted predictions:")
# print(tabulate(team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']].head(), headers='keys', tablefmt='pretty'))

# # Final Output: Display the adjusted team records
# print("\nFinal adjusted predictions based on the model:")
# adjusted_records = team_records[['Team', 'Adjusted_Win_Percentage', 'Adjusted_Predicted_Wins', 'Adjusted_Predicted_Losses']]
# print(tabulate(adjusted_records.sort_values(by='Adjusted_Predicted_Wins', ascending=False), headers='keys', tablefmt='pretty'))
