In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

games = pd.read_csv(r"C:\Users\l3anp\OneDrive\Desktop\inst414\nba_dataset_inst414\Cleaned_Games.csv")
games.head()

In [None]:
# histogram of home scores
plt.hist(games['homeScore'], bins=15, color='orange')

plt.title('Distribution of Home Scores')
plt.xlabel('Home Scores')
plt.ylabel('Frequency')
plt.show()

**Figure 1. Distribution of Home Team Scores.**
This figure represents a fairly normal distribution and shows that most teams tend to score between 100-120 points

In [None]:
# summary of figure 1 statistics
games['homeScore'].describe()

In [None]:
# histogram of away scores
plt.hist(games['awayScore'], bins=15, color='orange')

plt.title('Distribution of Away Scores')
plt.xlabel('Away Scores')
plt.ylabel('Frequency')
plt.show()

**Figure 2. Distribution of Away Team Scores.**
This figure represents a fairly normal distribution and shows that most away teams tend to score around 100-115 points

In [None]:
# summary of figure 2 statistics
games['awayScore'].describe()

In [None]:
# drop header row
games = games.drop(index=0)

# create winType to distinguish home and away wins from winner column
games['winType'] = games.apply(
    lambda row: 'Home Win' if row['winner'] == row['hometeamId'] else 'Away Win', axis=1)

# countplot of home and away wins
sns.countplot(x='winType', data=games, color='orange')

plt.title('Distribution of Game Winners')
plt.xlabel('Wins')
plt.ylabel('Frequency')
plt.show()

**Figure 3. Distribution of Game Winners.**
This figure represents the number of home wins and away wins.

In [None]:
# count of game winners from figure 3
games['winType'].value_counts()

In [None]:
# create gameTypeSpecific to distinguish playoff and regular season games from gameType column
games['gameTypeSpecific'] = games.apply(
    lambda row: 'Regular Season' if row['gameType'] == 'Regular Season' else 'Playoffs', axis=1)

# countplot of playoff and regular season games
sns.countplot(x='gameTypeSpecific', data=games, color='orange')

plt.title('Distribution of Game Types')
plt.xlabel('Game Type')
plt.ylabel('Frequency')
plt.show()

**Figure 4. Distribution of Game Types.** This figure represents the number of playoff and regular season games.

In [None]:
# count of game types from figure 4 
games['gameTypeSpecific'].value_counts()

In [None]:
# create homeWin to distinguish only home wins from winner column
games['homeWin'] = games['winner'] == games['hometeamId']
games['homeWin'] = games['homeWin'].astype(int)

In [None]:
# bar plot of home win proportions by game type
sns.barplot(data=games, x='gameTypeSpecific', y='homeWin', estimator='mean', color='orange')

plt.title('Home Win Rate by Game Type')
plt.xlabel('Game Type')
plt.ylabel('Proportion of Home Wins')
plt.show()

**Figure 5. Home Win Rate by Game Type.** 
This figure represents the proportion of games won by home teams during the playoffs and regular season.

In [None]:
# summary of figure 5 statistics
games.groupby('gameTypeSpecific')['homeWin'].mean()

In [None]:
# box plot of attendance by game type and outcome
axes = sns.boxplot(data=games, x='gameTypeSpecific', y='attendance', hue='homeWin')

sns.move_legend(axes, "best", title='Game Outcome', labels=['Home Loss', 'Home Win'])
plt.title('Attendance by Game Type and Game Outcome')
plt.xlabel('Game Type')
plt.ylabel('Attendance')
plt.show()

**Figure 6. Attendance by Game Type and Game Outcome.** 
This figure represents the attendance of games during playoff and regular season home wins and losses.

In [None]:
# summary of figure 6 statistics
games.groupby(['gameTypeSpecific', 'homeWin'])['attendance'].describe()