# NBA 2022-23 Regular Season Summary Report Project
---

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

## **Reviewing Datasets**

Audited and determined which datasets would be used for analysis and visualization.

In [None]:
official = pd.read_csv('nba database 4:12/archive/csv/officials.csv') #list of nba officials
official

In [None]:
official.info()

In [None]:
common = pd.read_csv('nba database 4:12/archive/csv/common_player_info.csv')
common #quick info on players' background

In [None]:
common.info()

In [None]:
draft_comb = pd.read_csv('nba database 4:12/archive/csv/draft_combine_stats.csv') 
draft_comb #info on draft combine

In [None]:
draft_comb.info()

In [None]:
draft_hist = pd.read_csv('nba database 4:12/archive/csv/draft_history.csv') 
draft_hist #does not include undrafted players (ex. Austin Reaves)

In [None]:
draft_hist.loc[draft_hist.player_name == "Austin Reaves"] #to verify that undrafted players are not included

In [None]:
game_info = pd.read_csv('nba database 4:12/archive/csv/game_info.csv') 
game_info #game history dates up to 3/12/23

In [None]:
game_sum = pd.read_csv('nba database 4:12/archive/csv/game_summary.csv') 
game_sum

In [None]:
game = pd.read_csv('nba database 4:12/archive/csv/game.csv') 
game

In [None]:
game.info()

In [None]:
inactive = pd.read_csv('nba database 4:12/archive/csv/inactive_players.csv') 
inactive

In [None]:
inactive.info()

In [None]:
line = pd.read_csv('nba database 4:12/archive/csv/line_score.csv') 
line #similar to game csv file, but this one includes the line scores

In [None]:
line.tail() #also has games up to 3/12/23 

In [None]:
line.info()

In [None]:
other = pd.read_csv('nba database 4:12/archive/csv/other_stats.csv') 
other

In [None]:
other.tail()

In [None]:
other.info()

In [None]:
player = pd.read_csv('nba database 4:12/archive/csv/player.csv') 
player

In [None]:
player.info()

In [None]:
team_details = pd.read_csv('nba database 4:12/archive/csv/team_details.csv') 
team_details

In [None]:
team_details.info()

In [None]:
team_hist = pd.read_csv('nba database 4:12/archive/csv/team_history.csv') 
team_hist

In [None]:
team_info = pd.read_csv('nba database 4:12/archive/csv/team_info_common.csv') 
team_info

In [None]:
team_info.info()

In [None]:
team = pd.read_csv('nba database 4:12/archive/csv/team.csv') 
team 

In [None]:
team.info()

In [None]:
win_lose = pd.read_csv('W_L Stats.csv') 
win_lose

## **Cleaning the Datasets**

This also included collecting the missing games after 3/12.

In [None]:
testing3 = game.duplicated()
testing3.value_counts()

In [None]:
testing4 = line.duplicated()
testing4.value_counts()

Looked for the first game of the 2022-2023 regular season.

In [None]:
line.game_sequence

In [None]:
line.loc[line.game_date_est=='2022-10-18 00:00:00']

After finding the first game, I created a new dataframe to store it and the games afterwards.

In [None]:
line2 = line.iloc[59790:] #created new variable that represents current season up until 3/12/23
line2

In [None]:
line2.info() #no nulls

Filtered out the columns not needed.

In [None]:
line3 = line2.loc[:,['game_date_est', 'team_abbreviation_home','team_nickname_home','team_wins_losses_home','pts_home','team_abbreviation_away','team_nickname_away','team_wins_losses_away','pts_away']]
line3

In [None]:
line3.info()

Imported the missing games from another source. Reviewed (and cleaned if needed) it before combining the data.

In [None]:
missing = pd.read_csv('missing games - games after 3_12.csv')
missing

In [None]:
missing.info()

In [None]:
test_missing = missing.duplicated()
test_missing.value_counts()

In [None]:
new_line = pd.concat([line3, missing])
new_line

Note: combining the two dataframes can also be achieved through the **append()** function.

new_line = line3.append(missing)

In [None]:
new_line.info()

Filtered **new_line** to keep the necessary columns. Updated **missing** to have consistent team name format.

In [None]:
new_line2 = new_line.loc[:,['game_date_est','team_abbreviation_home','team_nickname_home','team_wins_losses_home','pts_home','team_abbreviation_away','team_nickname_away','team_wins_losses_away','pts_away']]
new_line2

In [None]:
new_line2.count()

In [None]:
new_line2.info()

In [None]:
new_line2.to_csv('completed_games.csv')

Splitting the **Overall** column from **win_lose** pd into 2 separate columns (**Wins** and **Losses**).

In [None]:
split_win_loss = win_lose.loc[:,['Team', 'Overall']]
split_win_loss

In [None]:
split_win_loss[['Wins','Losses']] = split_win_loss.Overall.str.split('-',expand = True)
split_win_loss

Created a modified version of "complete_gamed" file to have a proper date format.

In [None]:
new_complete = pd.read_csv('completed_games_2.csv')
new_complete 

In [None]:
new_complete.info()

Incorporated a datatset about **stats** per game.

In [None]:
stats = pd.read_csv('Per Game Stats.csv')
stats

In [None]:
stats.info()

In [None]:
test_stats = stats.duplicated()
test_stats.value_counts()

## **Illustrating the Data**

### Total Wins

First the dataframe needed to be sorted in ascending order by **Wins** before creating the visual.


In [None]:
sort_split = split_win_loss.sort_values(by = 'Wins')
sort_split

In [None]:
q = sort_split.Team
w = sort_split.Wins

plt.xticks(rotation = 'vertical')
plt.title('Total Wins')
plt.bar(q,w)

### Field Goals 

In [None]:
plt.barh(stats.Team, stats.FG, height = 0.5)

plt.title('Field Goals')
plt.xlabel('Field Goals per Game')
plt.grid(axis = 'x')

### Rebounds

In [None]:
plt.barh(stats.Team, stats.TRB, height = 0.5)

plt.title('Rebounds')
plt.xlabel('Total Rebounds per Game')
plt.grid(axis = 'x')

### Free Throws

In [None]:
plt.barh(stats.Team, stats.FT, height = 0.5)

plt.title('Free Throws')
plt.xlabel('Free Throws Made per Game')
plt.grid(axis = 'x')

### Blocks

In [None]:
a = stats.Team
b = stats.BLK

for i in range(len(a)):
    plt.text(i, b[i], b[i], ha = 'center', va= 'bottom')

plt.title('Blocks')
plt.ylabel('Blocks per Game')
plt.xticks(rotation = 'vertical')

plt.bar(stats.Team, stats.BLK, width =0.5)

### Assists

In [None]:
a = stats.Team
b = stats.AST

for i in range(len(a)):
    plt.text(i, b[i], b[i], ha = 'center', va= 'bottom')

plt.title('Assists')
plt.ylabel('Assists per Game')
plt.xticks(rotation = 'vertical')

plt.bar(stats.Team, stats.AST, width =0.5)

### Steals

In [None]:
a = stats.Team
b = stats.STL

for i in range(len(a)):
    plt.text(i, b[i], b[i], ha = 'center', va= 'bottom')

plt.title('Steals')
plt.ylabel('Steals per Game')
plt.xticks(rotation = 'vertical')

plt.bar(stats.Team, stats.STL, width =0.5)