In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
total = pd.read_csv("output.csv")

In [3]:
total.head()

Unnamed: 0,player_id,full_name,team,season,week,week_start,conference,pow_conference,games_played_this_week,numMinutes,...,breakout_score,league_pts_mean,league_pts_std,league_ast_mean,league_ast_std,league_pm_mean,league_pm_std,z_s_pts,z_s_ast,z_s_pm
0,305,Robert Parish,Warriors,1979,1,1979-12-31,West,West,3,101.0,...,-0.623703,,,,,,,0.0,0.0,0.0
1,76003,Kareem Abdul-Jabbar,Lakers,1979,1,1979-12-31,West,West,2,82.0,...,-1.218188,,,,,,,0.0,0.0,0.0
2,76005,Tom Abernethy,Warriors,1979,1,1979-12-31,West,West,1,11.0,...,-1.365436,,,,,,,0.0,0.0,0.0
3,76011,Alvan Adams,Suns,1979,1,1979-12-31,West,West,2,64.0,...,-0.576144,,,,,,,0.0,0.0,0.0
4,76085,James Bailey,SuperSonics,1979,1,1979-12-31,West,West,4,41.0,...,0.643651,,,,,,,0.0,0.0,0.0


In [13]:
# player win share (using won_player_of_the_week and wins_this_week)

print("Player of the Week Statistics:")
pow_winners = total[total['won_player_of_the_week'] == 1]
non_winners = total[total['won_player_of_the_week'] == 0]
print(f"Total PoW awards: {len(pow_winners)}")
print(f"PoW win rate: {(len(pow_winners) / len(total) * 100):.2f}%")

print("\nMost PoW Awards (Top 10):")
top_pow = pow_winners['full_name'].value_counts().head(10)
print(top_pow)

print("\nWins This Week - PoW vs Non-PoW:")
print(f"PoW Winners avg wins: {pow_winners['wins_this_week'].mean():.2f}")
print(f"Non-PoW avg wins: {non_winners['wins_this_week'].mean():.2f}")

Player of the Week Statistics:
Total PoW awards: 1394
PoW win rate: 0.59%

Most PoW Awards (Top 10):
full_name
LeBron James             64
Kevin Durant             33
Kobe Bryant              29
James Harden             25
Giannis Antetokounmpo    23
Allen Iverson            20
Michael Jordan           20
Karl Malone              20
Tim Duncan               20
Russell Westbrook        19
Name: count, dtype: int64

Wins This Week - PoW vs Non-PoW:
PoW Winners avg wins: 3.61
Non-PoW avg wins: 1.82


We can see that the average wins per week is double for players that won player of the week vs. players that did not win: 3.61 vs. 1.82. This shows that our feature 'wins_this_week' can be a potentially good feature for estimating if a player is a winner.


Games Played This Week:
PoW Winners avg: 3.49
Non-Winners avg: 2.96

Wins This Week:
PoW Winners avg: 3.61
Non-Winners avg: 1.82

Weekly Win Rate:
PoW Winners avg: 1.04
Non-Winners avg: 0.61


In [18]:
# Team record over the week (wins_this_week, games_played_this_week)
print("Games Played This Week:")
print(f"Count: {total['games_played_this_week'].count()}")
print(f"Average: {total['games_played_this_week'].mean():.2f}")
print(f"Standard Deviation: {total['games_played_this_week'].std():.2f}")
print(f"PoW Winners Avg: {pow_winners['games_played_this_week'].mean():.2f}")
print(f"Non-Winners Avg: {non_winners['games_played_this_week'].mean():.2f}")

print("\nWins This Week:")
print(f"Count: {total['wins_this_week'].count()}")
print(f"Average: {total['wins_this_week'].mean():.2f}")
print(f"Standard Deviation: {total['wins_this_week'].std():.2f}")
print(f"PoW Winners Avg: {pow_winners['wins_this_week'].mean():.2f}")
print(f"Non-Winners Avg: {non_winners['wins_this_week'].mean():.2f}")


print("\nWeekly Win Rate:")
weekly_data = total[total['games_played_this_week'] > 0].copy()
weekly_data['week_win_rate'] = weekly_data['wins_this_week'] / weekly_data['games_played_this_week']
print(f"Count: {weekly_data['week_win_rate'].count()}")
print(f"Average: {weekly_data['week_win_rate'].mean():.2f}")
print(f"Standard Deviation: {weekly_data['week_win_rate'].std():.2f}")
pow_weekly = pow_winners[pow_winners['games_played_this_week'] > 0].copy()
pow_weekly['week_win_rate'] = pow_weekly['wins_this_week'] / pow_weekly['games_played_this_week']

non_weekly = non_winners[non_winners['games_played_this_week'] > 0].copy()
non_weekly['week_win_rate'] = non_weekly['wins_this_week'] / non_weekly['games_played_this_week']

print(f"PoW Winners Avg: {pow_weekly['week_win_rate'].mean():.2f}")
print(f"Non-Winners Avg: {non_weekly['week_win_rate'].mean():.2f}")

Games Played This Week:
Count: 234497
Average: 2.96
Standard Deviation: 0.99
PoW Winners Avg: 3.49
Non-Winners Avg: 2.96

Wins This Week:
Count: 234497
Average: 1.83
Standard Deviation: 1.58
PoW Winners Avg: 3.61
Non-Winners Avg: 1.82

Weekly Win Rate:
Count: 234497
Average: 0.62
Standard Deviation: 0.51
PoW Winners Avg: 1.04
Non-Winners Avg: 0.61


The average number of games played per week is ~3 and the average number of wins per week is ~1.8. By comparing the two metrics, we can see that on average players win a little more than half of their games per week. This is evident in our weekly win rate ~62%. For all 3 metrics, we how the average for winners is greater than non-winners. Again, this indicates how our features can be good predictors for winners. 

In [19]:
# Win Streak (home_win_streak_prior, away_win_streak_prior)
print("Home Win Streak:")
print(f"Count: {total['home_win_streak_prior'].count()}")
print(f"Average: {total['home_win_streak_prior'].mean():.2f}")
print(f"Standard Deviation: {total['home_win_streak_prior'].std():.2f}")
print(f"Max: {total['home_win_streak_prior'].max()}")
print(f"PoW Winners Avg: {pow_winners['home_win_streak_prior'].mean():.2f}")
print(f"Non-Winners Avg: {non_winners['home_win_streak_prior'].mean():.2f}")

print("\nAway Win Streak:")
print(f"Count: {total['away_win_streak_prior'].count()}")
print(f"Average: {total['away_win_streak_prior'].mean():.2f}")
print(f"Standard Deviation: {total['away_win_streak_prior'].std():.2f}")
print(f"Max: {total['away_win_streak_prior'].max()}")
print(f"PoW Winners Avg: {pow_winners['away_win_streak_prior'].mean():.2f}")
print(f"Non-Winners Avg: {non_winners['away_win_streak_prior'].mean():.2f}")

Home Win Streak:
Count: 234497
Average: 0.97
Standard Deviation: 1.03
Max: 9
PoW Winners Avg: 1.59
Non-Winners Avg: 0.97

Away Win Streak:
Count: 234497
Average: 0.63
Standard Deviation: 0.83
Max: 8
PoW Winners Avg: 1.16
Non-Winners Avg: 0.63


We see here how for home games, despite the belief of "home-team advantage", the average win streak is ~1. This means that teams on average do not go on a win streak > 2 for home games. In the same notion, we see that the average win streak for away games is lower: 0.63. However, we see for both metrics, winners on average have a higher win streak for both home and away games compared to non-winners.