In [1]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid")
import plotly.io as pio



In [None]:
#Streamlit plots I want to do
#Season by season for each stat
#Career Stats against each team

In [2]:
df = pd.read_csv('goat.csv')

In [10]:
column_names = df.columns.tolist()

# Display the list of column names
print("Column Names:", column_names)

Column Names: ['Player', 'Season', 'Team', 'Location', 'Opp Team', 'Date', 'Result', 'Mins Played', 'Field Goal', 'Field Goal Att', 'Field Goal %', '3 Point Field Goal', '3 Point Field Goal Att', '3 Point Field Goal %', 'Free Throw', 'Free Throw Att', 'Free Throw %', 'Rebounds', 'Assists', 'Steals', 'Blocks', 'Turnovers', 'Points', 'Usage %', 'True Shooting %', 'Effective Fg %', 'Total Rebound %', 'Assist %', 'Offensive Rating', 'Defensive Rating', 'Game Score', 'Playoffs', 'Championship Season']


In [130]:
result_df = df[df['Playoffs'] == False]
result_df = result_df.groupby(['Season', 'Player']).agg({'Points': 'sum','Date': 'count'}).reset_index()
result_df['Games_Played'] = result_df['Date']
result_df = result_df.drop('Date', axis=1)
result_df['PPG'] = result_df['Points']/result_df['Games_Played']
result_df['Season'] = result_df.groupby('Player')['Season'].rank(ascending=True).astype(int)
#Create a Plot showing the trend in Points per game for each season of the player's career
fig = px.line(result_df, x='Season', y='PPG', color='Player', color_discrete_sequence=["Indigo", "LimeGreen"],
                 labels={'PPG': 'Points Per Game', 'Season': 'Season', 'Player':'Player Name'},
                 hover_data=['Player'])
fig.update_layout(title_text='PPG for Each Season of Career', title_x=0.5)
fig.show()
pio.write_image(fig, 'ppg.png')

In [131]:
result_df= df.sort_values(by=['Player', 'Game Score'], ascending=[True, False])
result_df = result_df.groupby('Player').head(20)
fig = px.scatter(result_df, x="Game Score", y="Usage %", color="Player",
                  hover_data=['Player','Date','Opp Team', 'Result'], color_discrete_sequence=["LimeGreen", "Indigo"])
fig.update_layout(title_text='Top 20 Career Game Scores vs Usage Rates', title_x=0.5)
fig.show()
pio.write_image(fig, 'top20.png')

In [132]:
result_df = df.groupby(['Playoffs', 'Player']).agg({'Game Score': 'mean'}).reset_index()
fig = px.histogram(result_df, x="Player", y="Game Score",
             color='Playoffs', barmode='group',
             height=400,color_discrete_sequence=["Indigo", "LimeGreen"])
fig.update_layout(title_text='Career Game Score Average in Reg. Season & Playoffs', title_x=0.5, yaxis=dict(title='Mean Game Score'))
fig.show()
pio.write_image(fig, 'game_score.png')

In [133]:
result_df = df[df['Player'] == 'Michael Jordan'] 
fig = px.density_heatmap(result_df, x="Offensive Rating", y="Defensive Rating", nbinsx=20, nbinsy=20, color_continuous_scale="Viridis")
fig.update_layout(title_text='Michael Jordan Efficiency Rating Heatmap', title_x=0.5)
fig.show()
pio.write_image(fig, 'mj_heatmap.png')

In [134]:
result_df = df[df['Player'] == 'Lebron James'] 
fig = px.density_heatmap(result_df, x="Offensive Rating", y="Defensive Rating", nbinsx=20, nbinsy=20, color_continuous_scale="viridis")
fig.update_layout(title_text='Lebron James Efficiency Rating Heatmap', title_x=0.5)
fig.show()
pio.write_image(fig, 'lbj_heatmap.png')

In [136]:
fig = px.violin(df, x="Player", y="Usage %", color="Result", color_discrete_sequence=["Indigo", "LimeGreen"])
fig.update_layout(title_text='Usage Percentage Based on Game Outcome', title_x=0.5)
fig.show()
pio.write_image(fig, 'violin.png')

In [137]:
result_df = df.groupby(['Player']).agg({'Effective Fg %': 'mean'}).reset_index()
fig = px.histogram(result_df, x="Player", y="Effective Fg %",
             barmode='group',
             height=400, color_discrete_sequence=["Indigo", "SpringGreen"])
fig.update_layout(title_text='Career Average Effective Field Goal Percentage', title_x=0.5,yaxis=dict(title='Effective FG %'))
fig.show()
pio.write_image(fig, 'efg.png')

In [10]:
result_df = df.groupby(['Player', 'Opp Team']).agg({'Points': 'mean'}).reset_index()
result_df= result_df.sort_values(by=['Player', 'Points'], ascending=[True, False])
result_df = result_df.groupby('Player').head(5)
result_df


Unnamed: 0,Player,Opp Team,Points
21,Lebron James,NOK,28.25
14,Lebron James,LAL,27.633333
1,Lebron James,BOS,27.349057
12,Lebron James,IND,26.887755
19,Lebron James,NJN,26.783784
58,Michael Jordan,PHO,34.096774
64,Michael Jordan,UTA,32.763158
51,Michael Jordan,MIL,32.337838
59,Michael Jordan,POR,32.21875
39,Michael Jordan,CLE,32.034483


In [None]:
result_df = df.groupby(['Player', 'Opp Team']).agg({'Points': 'mean'}).reset_index()
result_df= result_df.sort_values(by=['Player', 'Points'], ascending=[True, False])
result_df = result_df.groupby('Player').tail(5)
result_df

In [24]:
result_df = df.groupby(['Season', 'Player','Playoffs']).agg({'Points': 'mean'}).reset_index()
result_df = result_df.sort_values(by=['Player', 'Playoffs', 'Season'])
#result_df['Season Num'] = result_df.groupby(['Player','Playoffs'])['Season'].rank(ascending=True).astype(int)
result_df
#Create a Plot showing the trend in Points per game for each season of the player's career
# fig = px.line(result_df, x='Season', y='PPG', color='Player', color_discrete_sequence=["Indigo", "LimeGreen"],
#                  labels={'PPG': 'Points Per Game', 'Season': 'Season', 'Player':'Player Name'},
#                  hover_data=['Player'])
# fig.update_layout(title_text='PPG for Each Season of Career', title_x=0.5)
# fig.show()

Unnamed: 0,Season,Player,Playoffs,Points
28,2003-2004,Lebron James,False,20.170732
29,2004-2005,Lebron James,False,26.52439
30,2005-2006,Lebron James,False,30.219512
32,2006-2007,Lebron James,False,26.0
34,2007-2008,Lebron James,False,27.439024
36,2008-2009,Lebron James,False,28.097561
38,2009-2010,Lebron James,False,27.536585
40,2010-2011,Lebron James,False,25.743902
42,2011-2012,Lebron James,False,25.5
44,2012-2013,Lebron James,False,24.829268


In [15]:
pd.set_option('display.max_rows', 70)  # Set the desired number of rows you want to display it

In [30]:
result_df = df[df['Playoffs'] == True]
result_df= result_df.sort_values(by=['Player', 'Game Score'], ascending=[True, False])
result_df = result_df.groupby('Player').head(50)
fig = px.scatter(result_df, x="Game Score", y="Usage %", color="Player",
                  hover_data=['Player','Date','Opp Team', 'Result'], color_discrete_sequence=["LimeGreen", "Indigo"])
fig.update_layout(title_text='Top 20 Career Game Scores vs Usage Rates', title_x=0.5)
fig.show()
