### Dream Team project
Data science project showing information on the NBA careers of the 12 members of the 1992 US Olympic Men's basketball team. This project uses data from the NBA_API. This project includes a variety of visualizations including bar charts, pie charts, boxplots, scatterplots and a map.

In [None]:
# applying packages
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"
import matplotlib.pyplot as plt
import plotly.express as px
from nba_api.stats.static import players
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import teams
import plotly.graph_objects as go
from geopy.geocoders import Nominatim
import statistics

In [2]:
# get IDs of all players on roster from the NBA API
roster=['Charles Barkley','Larry Bird','Clyde Drexler','Patrick Ewing','Magic Johnson', 
        'Michael Jordan', 'Christian Laettner', 'Karl Malone','Chris Mullin',
        'Scottie Pippen', 'David Robinson', 'John Stockton']
ID=[]
for x in roster:
   my_diction=players.find_players_by_full_name(x)[0]
   counter = 1
   for i in my_diction.values():
      if counter == 1:
         ID.append(i)
      else:
         continue
      counter +=1

In [3]:
# show information about player IDs
ID
len(ID)
type(ID)
type(ID[0])

[787, 1449, 17, 121, 77142, 893, 363, 252, 904, 937, 764, 304]

12

list

int

In [None]:
# get career stats dataframes for each player and place them into a single dictionary
names=['barkley','bird','drexler','ewing','johnson', 'jordan', 'laettner', 'malone','mullin','pippen', 'robinson', 'stockton']
dataframes={}

for x,y in zip(ID, names):
    career= playercareerstats.PlayerCareerStats(player_id=x)
    df=career.get_data_frames()[0]
    dataframes[y]=df

In [None]:
# create individual dataframes for each player from dataframes in dictionary 
counter = 1
for x in dataframes.values():
    if counter == 1:
        barkley = x
    elif counter == 2:
        bird = x
    elif counter == 3:
        drexler = x
    elif counter == 4:
        ewing = x
    elif counter == 5:
        johnson = x
    elif counter == 6:
        jordan = x
    elif counter == 7:
        laettner = x
    elif counter == 8:
        malone = x
    elif counter == 9:
        mullin = x
    elif counter == 10:
        pippen = x
    elif counter == 11:
        robinson = x
    else:
        stockton=x
    counter +=1

In [None]:
# get information about dataframes
print(barkley.info(), bird.info(), drexler.info(), ewing.info(),
      johnson.info(), jordan.info(), laettner.info(), malone.info(),
      mullin.info(), pippen.info(), robinson.info(), stockton.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          16 non-null     int64  
 1   SEASON_ID          16 non-null     object 
 2   LEAGUE_ID          16 non-null     object 
 3   TEAM_ID            16 non-null     int64  
 4   TEAM_ABBREVIATION  16 non-null     object 
 5   PLAYER_AGE         16 non-null     float64
 6   GP                 16 non-null     int64  
 7   GS                 16 non-null     int64  
 8   MIN                16 non-null     int64  
 9   FGM                16 non-null     int64  
 10  FGA                16 non-null     int64  
 11  FG_PCT             16 non-null     float64
 12  FG3M               16 non-null     int64  
 13  FG3A               16 non-null     int64  
 14  FG3_PCT            16 non-null     float64
 15  FTM                16 non-null     int64  
 16  FTA                16 non-nu

In [None]:
# check for duplicate season IDs in dataframes (players who might have been traded mid season may have duplicate season IDs)
for name in names:
    dup=eval(name)['SEASON_ID'].value_counts()>1
    dups=sum(dup)
    print(f"In the", name, "dataframe, there are", dups, "duplicate season IDs." )

In the barkley dataframe, there are 0 duplicate season IDs.
In the bird dataframe, there are 0 duplicate season IDs.
In the drexler dataframe, there are 1 duplicate season IDs.
In the ewing dataframe, there are 0 duplicate season IDs.
In the johnson dataframe, there are 0 duplicate season IDs.
In the jordan dataframe, there are 0 duplicate season IDs.
In the laettner dataframe, there are 2 duplicate season IDs.
In the malone dataframe, there are 0 duplicate season IDs.
In the mullin dataframe, there are 0 duplicate season IDs.
In the pippen dataframe, there are 0 duplicate season IDs.
In the robinson dataframe, there are 0 duplicate season IDs.
In the stockton dataframe, there are 0 duplicate season IDs.


In [36]:
# bar chart of the number of seasons played for each player
season_num=[]
for dataframe in dataframes.values():
    season_num.append(dataframe['SEASON_ID'].value_counts().shape[0])

bar_trace = go.Bar(x=roster, y=season_num)
fig=go.Figure(data=[bar_trace]).update_layout(title='Number of seasons played during NBA career, by player', 
                                              xaxis_title='Player', 
                                              yaxis_title='Number of NBA seasons', plot_bgcolor='lightgrey').update_traces(marker_color='crimson')
fig.show()

In [None]:
# create scattermap of locations of teams that players played for during NBA career
barkley['Name']='Charles Barkley'
bird['Name']='Larry Bird'
drexler['Name']='Clyde Drexler'
ewing['Name']='Patrick Ewing'
johnson['Name']='Magic Johnson'
jordan['Name']='Michael Jordan'
laettner['Name']='Christian Laettner'
malone['Name']='Karl Malone'
mullin['Name']='Chris Mullin'
pippen['Name']='Scottie Pippen'
robinson['Name']='David Robinson'
stockton['Name']='John Stockton'
df_all=pd.concat([barkley, bird, drexler, ewing,
                  johnson, jordan, laettner, malone,
                  mullin, pippen, robinson, stockton], axis=0).reset_index(drop=True)
df_all.info()
df_all.head()
df_all.tail()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          193 non-null    int64  
 1   SEASON_ID          193 non-null    object 
 2   LEAGUE_ID          193 non-null    object 
 3   TEAM_ID            193 non-null    int64  
 4   TEAM_ABBREVIATION  193 non-null    object 
 5   PLAYER_AGE         193 non-null    float64
 6   GP                 193 non-null    int64  
 7   GS                 193 non-null    int64  
 8   MIN                193 non-null    int64  
 9   FGM                193 non-null    int64  
 10  FGA                193 non-null    int64  
 11  FG_PCT             193 non-null    float64
 12  FG3M               193 non-null    int64  
 13  FG3A               193 non-null    int64  
 14  FG3_PCT            193 non-null    float64
 15  FTM                193 non-null    int64  
 16  FTA                193 non

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Name
0,787,1984-85,0,1610612755,PHL,22.0,82,60,2347,427,...,266,437,703,155,95,80,209,301,1148,Charles Barkley
1,787,1985-86,0,1610612755,PHL,23.0,80,80,2952,595,...,354,672,1026,312,173,125,350,333,1603,Charles Barkley
2,787,1986-87,0,1610612755,PHL,24.0,68,62,2740,557,...,390,604,994,331,119,104,322,252,1564,Charles Barkley
3,787,1987-88,0,1610612755,PHL,25.0,80,80,3170,753,...,385,566,951,254,100,103,304,278,2264,Charles Barkley
4,787,1988-89,0,1610612755,PHL,26.0,79,79,3088,700,...,403,583,986,325,126,67,254,262,2037,Charles Barkley


Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Name
188,304,1998-99,0,1610612762,UTA,37.0,50,50,1410,200,...,31,115,146,374,81,13,110,107,553,John Stockton
189,304,1999-00,0,1610612762,UTA,38.0,82,82,2432,363,...,45,170,215,703,143,15,179,192,990,John Stockton
190,304,2000-01,0,1610612762,UTA,39.0,82,82,2397,328,...,54,173,227,713,132,21,203,194,944,John Stockton
191,304,2001-02,0,1610612762,UTA,40.0,82,82,2570,401,...,59,204,263,674,152,24,208,209,1102,John Stockton
192,304,2002-03,0,1610612762,UTA,41.0,82,82,2275,309,...,51,150,201,629,137,16,182,184,884,John Stockton


In [None]:
# create variables for dataframe to be used for mapping teams and cities
Name=[]
Team_Abbreviation=[]
Team_Name=[]
City=[]
State=[]

for x in roster:
    test_series=df_all['TEAM_ABBREVIATION'][df_all['Name']==x].value_counts()
    test_list=list(test_series.index)
    for y in test_list:
        Name.append(x)
        Team_Abbreviation.append(y)
        a=teams.find_team_by_abbreviation(y)
        if a is None:
            Team_Name.append(np.nan)
            City.append(np.nan)
            State.append(np.nan)
        else:
            Team_Name.append(a['full_name'])
            City.append(a['city'])
            State.append(a['state'])   
            

In [None]:
# create mapping dataframe from lists
mapdata=pd.DataFrame({'Name': Name,
                      'Team_Abbreviation':Team_Abbreviation,
                      'Team_Name': Team_Name,
                      'City': City,
                      'State':State})

mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,,,
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas
6,Clyde Drexler,TOT,,,
7,Patrick Ewing,NYK,New York Knicks,New York,New York
8,Patrick Ewing,SEA,,,
9,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida


In [None]:
# fill in missing data and replace incorrect data
condition=mapdata['Team_Abbreviation'] == 'PHL'
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Philadelphia 76ers', 'Philadelphia', 'Pennsylvania'] 

condition=(mapdata['Team_Abbreviation'] == 'TOT') & (mapdata['Name'] == 'Clyde Drexler')
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Houston Rockets', 'Houston', 'Texas'] 

condition=mapdata['Team_Abbreviation'] == 'SEA'
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Seattle SuperSonics', 'Seattle', 'Washington'] 

condition=(mapdata['Team_Abbreviation'] == 'TOT') & (mapdata['Name'] == 'Christian Laettner')
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Dallas Mavericks', 'Dallas', 'Texas'] 

condition=mapdata['Team_Abbreviation'] == 'MIN'
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Minnesota Timberwolves', 'Minneapolis', 'Minnesota'] 

condition=mapdata['Team_Abbreviation'] == 'UTH' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Utah Jazz', 'Salt Lake City', 'Utah'] 

condition=mapdata['Team_Abbreviation'] == 'UTA' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Utah Jazz', 'Salt Lake City', 'Utah'] 

condition=mapdata['Team_Abbreviation'] == 'GOS' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Golden State Warriors', 'San Francisco', 'California'] 

condition=mapdata['Team_Abbreviation'] == 'IND' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Indiana Pacers', 'Indianapolis', 'Indiana'] 

condition=mapdata['Team_Abbreviation'] == 'GSW' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Golden State Warriors', 'San Francisco', 'California'] 

condition=mapdata['Team_Abbreviation'] == 'SAN' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['San Antonio Spurs', 'San Antonio', 'Texas'] 

#look at corrected dataframe
mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas
6,Clyde Drexler,TOT,Houston Rockets,Houston,Texas
7,Patrick Ewing,NYK,New York Knicks,New York,New York
8,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington
9,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida


In [None]:
# remove duplicate teams for players so that there is a single row for each team that a player played for
mapdata=mapdata.drop_duplicates(subset=['Name', 'Team_Name'], keep='first').reset_index(drop=True)

mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas
6,Patrick Ewing,NYK,New York Knicks,New York,New York
7,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington
8,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida
9,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California


In [None]:
# create single column for City and State combined to facilitate geocoding
City_State=[]
for index, row in mapdata[['City', 'State']].iterrows():
    City_State.append(row['City'] + ", " + row['State'])

mapdata['City_State']=City_State

mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State,City_State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania,"Philadelphia, Pennsylvania"
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona,"Phoenix, Arizona"
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas,"Houston, Texas"
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts,"Boston, Massachusetts"
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon,"Portland, Oregon"
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas,"Houston, Texas"
6,Patrick Ewing,NYK,New York Knicks,New York,New York,"New York, New York"
7,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington,"Seattle, Washington"
8,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida,"Orlando, Florida"
9,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California,"Los Angeles, California"


In [None]:
# geocoding City_State column for scattermap
Longitude=[]
Latitude=[]

geo=Nominatim(user_agent = "nba_map")
for x in mapdata['City_State']:
	geo_info = geo.geocode(x, timeout=None)
	Latitude.append(geo_info.latitude)
	Longitude.append(geo_info.longitude)

In [None]:
# check lists
Latitude
Longitude

[39.9527237,
 33.4484367,
 29.7589382,
 42.3554334,
 45.5202471,
 29.7589382,
 40.7127281,
 47.6038321,
 28.5421109,
 34.0536909,
 41.8755616,
 38.8950368,
 44.9772995,
 38.8950368,
 33.7544657,
 32.7762719,
 42.3315509,
 25.7741728,
 40.7596198,
 34.0536909,
 37.7792588,
 39.7683331,
 41.8755616,
 45.5202471,
 29.7589382,
 29.4246002,
 40.7596198]

[-75.1635262,
 -112.074141,
 -95.3676974,
 -71.060511,
 -122.674194,
 -95.3676974,
 -74.0060152,
 -122.330062,
 -81.3790304,
 -118.242766,
 -87.6244212,
 -77.0365427,
 -93.2654692,
 -77.0365427,
 -84.3898151,
 -96.7968559,
 -83.0466403,
 -80.19362,
 -111.886797,
 -118.242766,
 -122.4193286,
 -86.1583502,
 -87.6244212,
 -122.674194,
 -95.3676974,
 -98.4951405,
 -111.886797]

In [None]:
# create distinct coordinates with jittering to keep from having the exact same coordinates 
# for 2 different players in the same city which will cause only one player to show up in map
jitter_amount = 0.0001

Latitude_new=[coord + np.random.uniform(-jitter_amount, jitter_amount) for coord in Latitude]
Longitude_new=[coord + np.random.uniform(-jitter_amount, jitter_amount) for coord in Longitude]

Latitude_new
Longitude_new

[39.95277505808986,
 33.448500533683486,
 29.75899941492307,
 42.355520789727464,
 45.52015890195517,
 29.758854852238443,
 40.7127998250945,
 47.60381526554288,
 28.54212425177025,
 34.05360143833763,
 41.87558653620117,
 38.89504721481064,
 44.977246411145806,
 38.89494698444187,
 33.75449975142492,
 32.776279293283494,
 42.3314617030277,
 25.774076900414162,
 40.75957550544982,
 34.0536762266759,
 37.77921195421022,
 39.768332058941205,
 41.87563789859798,
 45.52033681138473,
 29.758964534673257,
 29.424655621241357,
 40.75959446919277]

[-75.16350846377955,
 -112.07413761865502,
 -95.3676531340327,
 -71.06060851979267,
 -122.67418462605785,
 -95.36776267819903,
 -74.00593987979089,
 -122.33007544612937,
 -81.37899210603659,
 -118.24284126994013,
 -87.62440420579199,
 -77.03645968738286,
 -93.2654109264783,
 -77.03656171328782,
 -84.38978385325971,
 -96.79694868824255,
 -83.04670426850545,
 -80.19354495283729,
 -111.88681953720716,
 -118.24267683498402,
 -122.41935124485441,
 -86.15825069441193,
 -87.62446578127265,
 -122.67426193924675,
 -95.36778211459799,
 -98.49506426986467,
 -111.88678909285673]

In [None]:
# add adjusted coordinates to mapdata dataframe
mapdata['Latitude']=Latitude_new
mapdata['Longitude']=Longitude_new

mapdata
mapdata.info()

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State,City_State,Latitude,Longitude
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania,"Philadelphia, Pennsylvania",39.952775,-75.163508
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona,"Phoenix, Arizona",33.448501,-112.074138
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas,"Houston, Texas",29.758999,-95.367653
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts,"Boston, Massachusetts",42.355521,-71.060609
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon,"Portland, Oregon",45.520159,-122.674185
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas,"Houston, Texas",29.758855,-95.367763
6,Patrick Ewing,NYK,New York Knicks,New York,New York,"New York, New York",40.7128,-74.00594
7,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington,"Seattle, Washington",47.603815,-122.330075
8,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida,"Orlando, Florida",28.542124,-81.378992
9,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California,"Los Angeles, California",34.053601,-118.242841


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               27 non-null     object 
 1   Team_Abbreviation  27 non-null     object 
 2   Team_Name          27 non-null     object 
 3   City               27 non-null     object 
 4   State              27 non-null     object 
 5   City_State         27 non-null     object 
 6   Latitude           27 non-null     float64
 7   Longitude          27 non-null     float64
dtypes: float64(2), object(6)
memory usage: 1.8+ KB


In [None]:
# scattermap of locations of all NBA teams played for during their careers
fig = px.scatter_map(mapdata, lat="Latitude", 
                     lon="Longitude", 
                     hover_data={'Latitude': False, 'Longitude': False, 'Name': True, 'Team_Name': True, 'City_State': True},
                     map_style='open-street-map', 
                     title= "NBA Teams Played for during NBA Careers",
                     color_discrete_sequence=['orange'],
                     height=800, zoom=3.5).update_traces(marker=dict(size=10))
fig.show()

In [None]:
# bar chart of the number of NBA teams played by player
number_of_teams=[]
for name in roster:
    number_of_teams.append(mapdata['Team_Name'][mapdata['Name']== name].value_counts().shape[0])  

bar_trace = go.Bar(x=roster, y=number_of_teams)
fig=go.Figure(data=[bar_trace]).update_layout(title='Number of NBA teams played for during NBA career, by player', 
                                              xaxis_title='Player', 
                                              yaxis_title='Number of NBA teams' , plot_bgcolor='lightgrey').update_traces(marker_color='purple').update_yaxes(showgrid=False)
  
fig.show()

In [None]:
# mode number of games started for each season
games_started=[]
for data in dataframes.values():
    games_started.append(int(statistics.mode(data['GS'])))

bar_trace1 = go.Bar(name="Games Started", x = roster, y = games_started, marker_color="gold")
fig = go.Figure(data=[bar_trace1]).update_layout(
                  title='Most Frequent Number of NBA Games Started in a Season, by player',
                  xaxis_title="Player",
                  yaxis_title="Number of NBA Games Started", plot_bgcolor='lightgrey').update_yaxes(showgrid=False)
fig.show()

In [None]:
# boxplots for the distributions for the number of minutes played each season
fig=go.Figure().add_trace(go.Box(y=barkley['MIN'], name="Charles Barkley")).add_trace(go.Box(y=bird['MIN'], name="Larry Bird")).add_trace(go.Box(y=drexler['MIN'], name="Clyde Drexler")).add_trace(go.Box(y=ewing['MIN'], name="Patrick Ewing")).add_trace(go.Box(y=johnson['MIN'], name="Magic Johnson")).add_trace(go.Box(y=jordan['MIN'], name="Michael Jordan")).add_trace(go.Box(y=laettner['MIN'], name="Christian Laettner")).add_trace(go.Box(y=malone['MIN'], name="Karl Malone")).add_trace(go.Box(y=mullin['MIN'], name="Chris Mullin")).add_trace(go.Box(y=pippen['MIN'], name="Scottie Pippen")).add_trace(go.Box(y=robinson['MIN'], name="David Robinson")).add_trace(go.Box(y=stockton['MIN'], name="John Stockton")).update_layout(title="Distribution of Total Minutes Played In a NBA Season, by player",xaxis_title="Player", yaxis_title="Number of Minutes Played in a NBA Season", yaxis=dict(tickformat=",d" ))
fig.show()

In [None]:
# generating scatter plot showing number of free throws made by the number of free throws attempted
target_categories1=['Charles Barkley','Larry Bird']
is_in_target_categories1 = df_all['Name'].isin(target_categories1)
df_ft1=df_all.loc[is_in_target_categories1, 
                 ['SEASON_ID','Name', 'FTM','FTA']].reset_index(drop=True)

target_categories2=['Patrick Ewing','Magic Johnson', 'Michael Jordan']
is_in_target_categories2 = df_all['Name'].isin(target_categories2)
df_ft2=df_all.loc[is_in_target_categories2, 
                 ['SEASON_ID','Name', 'FTM','FTA']].reset_index(drop=True)

target_categories3=['Karl Malone','Chris Mullin', 'Scottie Pippen', 'David Robinson', 'John Stockton']
is_in_target_categories3 = df_all['Name'].isin(target_categories3)
df_ft3=df_all.loc[is_in_target_categories3, 
                 ['SEASON_ID','Name', 'FTM','FTA']].reset_index(drop=True)

df_ft_drexler=df_all.loc[df_all['Name']=='Clyde Drexler', ['SEASON_ID','Name', 'FTM','FTA']].reset_index(drop=True)

df_ft_laettner=df_all.loc[df_all['Name']=='Christian Laettner', ['SEASON_ID','Name', 'FTM','FTA']].reset_index(drop=True)

df_ft_drexler_mean=df_ft_drexler.groupby('SEASON_ID')[['FTM','FTA']].mean().reset_index()
df_ft_drexler_mean['Name']='Clyde Drexler'
df_ft_laettner_mean=df_ft_laettner.groupby('SEASON_ID')[['FTM','FTA']].mean().reset_index()
df_ft_laettner_mean['Name'] = 'Christian Laettner'

df_ft=pd.concat([df_ft1, df_ft_drexler_mean, df_ft2, df_ft_laettner_mean, df_ft3], axis=0).reset_index(drop=True)

df_ft['FTA']=df_ft['FTA'].astype(int)
df_ft['FTM']=df_ft['FTM'].astype(int)
df_ft.info()
df_ft

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   SEASON_ID  187 non-null    object
 1   Name       187 non-null    object
 2   FTM        187 non-null    int64 
 3   FTA        187 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 6.0+ KB


Unnamed: 0,SEASON_ID,Name,FTM,FTA
0,1984-85,Charles Barkley,293,400
1,1985-86,Charles Barkley,396,578
2,1986-87,Charles Barkley,429,564
3,1987-88,Charles Barkley,714,951
4,1988-89,Charles Barkley,602,799
...,...,...,...,...
182,1998-99,John Stockton,137,169
183,1999-00,John Stockton,221,257
184,2000-01,John Stockton,227,278
185,2001-02,John Stockton,275,321


In [None]:
# scatter plot showing number of free throws made by the number of free throws attempted
fig=px.scatter(df_ft, x='FTA', y='FTM', color='Name', 
               title='Free Throws Made by Free Throws Attempted for Each Season, by player', hover_data=["SEASON_ID"],
               width=1000, height=600,
               labels={"FTA": "Number of Free Throws Attempted", "FTM": "Number of Free Throws Made"}).update_layout(xaxis=dict(tickformat=",", showgrid=False),
                                                                                                                      yaxis=dict(tickformat=",", showgrid=False),
                                                                                                                      plot_bgcolor='navy') 
fig.show()

In [None]:
# generating scatter plot showing number of field goals made by the number of field goals attempted
df_fg1=df_all.loc[is_in_target_categories1, 
                 ['SEASON_ID','Name', 'FGM','FGA']].reset_index(drop=True)
df_fg2=df_all.loc[is_in_target_categories2, 
                 ['SEASON_ID','Name', 'FGM','FGA']].reset_index(drop=True)
df_fg3=df_all.loc[is_in_target_categories3, 
                 ['SEASON_ID','Name', 'FGM','FGA']].reset_index(drop=True)

df_fg_drexler=df_all.loc[df_all['Name']=='Clyde Drexler', ['SEASON_ID','Name', 'FGM','FGA',]].reset_index(drop=True)

df_fg_laettner=df_all.loc[df_all['Name']=='Christian Laettner', ['SEASON_ID','Name', 'FGM','FGA']].reset_index(drop=True)

df_fg_drexler_mean=df_fg_drexler.groupby('SEASON_ID')[['FGM','FGA']].mean().reset_index()
df_fg_drexler_mean['Name']='Clyde Drexler'
df_fg_laettner_mean=df_fg_laettner.groupby('SEASON_ID')[['FGM','FGA']].mean().reset_index()
df_fg_laettner_mean['Name'] = 'Christian Laettner'

df_fg=pd.concat([df_fg1, df_fg_drexler_mean, df_fg2, df_fg_laettner_mean, df_fg3], axis=0).reset_index(drop=True)

df_fg['FGA']=df_fg['FGA'].astype(int)
df_fg['FGM']=df_fg['FGM'].astype(int)
df_fg.info()
df_fg


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   SEASON_ID  187 non-null    object
 1   Name       187 non-null    object
 2   FGM        187 non-null    int64 
 3   FGA        187 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 6.0+ KB


Unnamed: 0,SEASON_ID,Name,FGM,FGA
0,1984-85,Charles Barkley,427,783
1,1985-86,Charles Barkley,595,1041
2,1986-87,Charles Barkley,557,937
3,1987-88,Charles Barkley,753,1283
4,1988-89,Charles Barkley,700,1208
...,...,...,...,...
182,1998-99,John Stockton,200,410
183,1999-00,John Stockton,363,725
184,2000-01,John Stockton,328,651
185,2001-02,John Stockton,401,775


In [None]:
# scatter plot showing number of field goals made by the number of field goals attempted
fig=px.scatter(df_fg, x='FGA', y='FGM', color='Name', 
               title='Field Goals Made by Field Goals Attempted for Each Season, by player', hover_data=["SEASON_ID"],
               width=1000, height=600,
               labels={"FGA": "Number of Field Goals Attempted", "FGM": "Number of Field Goals Made"}).update_layout(xaxis=dict(tickformat=",", showgrid=False),
                                                                                                                      yaxis=dict(tickformat=",", showgrid=False),
                                                                                                                      plot_bgcolor='purple') 
fig.show()

In [None]:
# generateing scatterplot showing 3-pointers attempted by 3-pointers made
df_3p1=df_all.loc[is_in_target_categories1, 
                 ['SEASON_ID','Name', 'FG3M','FG3A']].reset_index(drop=True)
df_3p2=df_all.loc[is_in_target_categories2, 
                 ['SEASON_ID','Name', 'FG3M','FG3A']].reset_index(drop=True)
df_3p3=df_all.loc[is_in_target_categories3, 
                 ['SEASON_ID','Name', 'FG3M','FG3A']].reset_index(drop=True)

df_3p_drexler=df_all.loc[df_all['Name']=='Clyde Drexler', ['SEASON_ID','Name', 'FG3M','FG3A',]].reset_index(drop=True)

df_3p_laettner=df_all.loc[df_all['Name']=='Christian Laettner', ['SEASON_ID','Name', 'FG3M','FG3A']].reset_index(drop=True)

df_3p_drexler_mean=df_3p_drexler.groupby('SEASON_ID')[['FG3M','FG3A']].mean().reset_index()
df_3p_drexler_mean['Name']='Clyde Drexler'
df_3p_laettner_mean=df_3p_laettner.groupby('SEASON_ID')[['FG3M','FG3A']].mean().reset_index()
df_3p_laettner_mean['Name'] = 'Christian Laettner'

df_3p=pd.concat([df_3p1, df_3p_drexler_mean, df_3p2, df_3p_laettner_mean, df_3p3],axis=0).reset_index(drop=True)

df_3p['FG3A']=df_3p['FG3A'].astype(int)
df_3p['FG3M']=df_3p['FG3M'].astype(int)
df_3p.info()
df_3p

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   SEASON_ID  187 non-null    object
 1   Name       187 non-null    object
 2   FG3M       187 non-null    int64 
 3   FG3A       187 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 6.0+ KB


Unnamed: 0,SEASON_ID,Name,FG3M,FG3A
0,1984-85,Charles Barkley,1,6
1,1985-86,Charles Barkley,17,75
2,1986-87,Charles Barkley,21,104
3,1987-88,Charles Barkley,44,157
4,1988-89,Charles Barkley,35,162
...,...,...,...,...
182,1998-99,John Stockton,16,50
183,1999-00,John Stockton,43,121
184,2000-01,John Stockton,61,132
185,2001-02,John Stockton,25,78


In [None]:
# scatter plot showing number of 3-pointers goals made by the number of 3-pointers goals attempted
fig=px.scatter(df_3p, x='FG3A', y='FG3M', color='Name', 
               title='3-Pointers Made by 3-Pointers Attempted for Each Season, by player', hover_data=["SEASON_ID"],
               width=1000, height=600,
               labels={"FG3A": "Number of 3-Pointers Attempted", "FG3M": "Number of 3-Pointers Made"}).update_layout(xaxis=dict(tickformat=",", showgrid=False),
                                                                                                                      yaxis=dict(tickformat=",", showgrid=False),
                                                                                                                      plot_bgcolor='brown') 
fig.show()

In [None]:
# bar chart of total number of points made during NBA career of each player
pts_sums=[]

for x in roster:
    i=sum(df_all['PTS'][df_all['Name']==x])
    pts_sums.append(i)

combine={'Name':roster,'Points':pts_sums}
career_points=pd.DataFrame(combine)

fig = px.bar(career_points, x="Points", y="Name", orientation='h', color_discrete_sequence=['darkgreen'], 
             height=800,labels={'Name': 'Player', 'Points':'Total Career Points Scored'},
             title='Total NBA Career Points Scored, by player').update_yaxes(autorange='reversed').update_layout(plot_bgcolor='tan', xaxis=dict(tickformat=",")).update_xaxes(showgrid=False)
fig.show()

In [None]:
# create line chart to show career rebounds over time
df_reb1=df_all.loc[is_in_target_categories1, 
                 ['SEASON_ID','Name', 'REB']].reset_index(drop=True)
df_reb2=df_all.loc[is_in_target_categories2, 
                 ['SEASON_ID','Name', 'REB']].reset_index(drop=True)
df_reb3=df_all.loc[is_in_target_categories3, 
                 ['SEASON_ID','Name', 'REB']].reset_index(drop=True)

df_reb_drexler=df_all.loc[df_all['Name']=='Clyde Drexler', ['SEASON_ID','Name', 'REB']].reset_index(drop=True)

df_reb_laettner=df_all.loc[df_all['Name']=='Christian Laettner', ['SEASON_ID','Name', 'REB']].reset_index(drop=True)

df_reb_drexler_mean=df_reb_drexler.groupby('SEASON_ID')['REB'].mean().reset_index()
df_reb_drexler_mean['Name']='Clyde Drexler'
df_reb_laettner_mean=df_reb_laettner.groupby('SEASON_ID')['REB'].mean().reset_index()
df_reb_laettner_mean['Name'] = 'Christian Laettner'

df_reb=pd.concat([df_reb1, df_reb_drexler_mean, df_reb2, df_reb_laettner_mean, df_reb3],axis=0).reset_index(drop=True)

df_reb['REB']=df_reb['REB'].astype(int)
df_reb.info()
df_reb

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187 entries, 0 to 186
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   SEASON_ID  187 non-null    object
 1   Name       187 non-null    object
 2   REB        187 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 4.5+ KB


Unnamed: 0,SEASON_ID,Name,REB
0,1984-85,Charles Barkley,703
1,1985-86,Charles Barkley,1026
2,1986-87,Charles Barkley,994
3,1987-88,Charles Barkley,951
4,1988-89,Charles Barkley,986
...,...,...,...
182,1998-99,John Stockton,146
183,1999-00,John Stockton,215
184,2000-01,John Stockton,227
185,2001-02,John Stockton,263


In [None]:
# boxplots for the distributions for the number of reobunds played each season
fig=go.Figure().add_trace(go.Box(y=barkley['REB'], name="Charles Barkley")).add_trace(go.Box(y=bird['REB'], name="Larry Bird")).add_trace(go.Box(y=drexler['REB'], name="Clyde Drexler")).add_trace(go.Box(y=ewing['REB'], name="Patrick Ewing")).add_trace(go.Box(y=johnson['REB'], name="Magic Johnson")).add_trace(go.Box(y=jordan['REB'], name="Michael Jordan")).add_trace(go.Box(y=laettner['REB'], name="Christian Laettner")).add_trace(go.Box(y=malone['REB'], name="Karl Malone")).add_trace(go.Box(y=mullin['REB'], name="Chris Mullin")).add_trace(go.Box(y=pippen['REB'], name="Scottie Pippen")).add_trace(go.Box(y=robinson['REB'], name="David Robinson")).add_trace(go.Box(y=stockton['REB'], name="John Stockton")).update_layout(title="Distribution of Total Rebounds Made in a NBA Season, by player",xaxis_title="Player", yaxis_title="Number of Rebounds Made in a NBA Season", yaxis=dict(tickformat=",d" ))
fig.show()

In [None]:
# pie chart of career steals for all players

df_steals=df_all.groupby('Name')['STL'].sum().reset_index()

hovertemp = "<b>Name:</b> %{label}<br>" \
            "<b>Number of Career Assists:</b> %{value:,.0f}<br>" \
            "<b>Percentage:</b> %{percent:,.0%}<extra></extra>"

fig = px.pie(df_steals, values='STL', names='Name', title='Percentage of NBA Career Steals by Dream Team Members', 
             color_discrete_sequence=px.colors.sequential.RdBu, height=600).update_traces(hovertemplate=hovertemp,texttemplate='%{percent:,.0%}')
fig.show()

In [None]:
# pie chart of career assists for all players

df_assists=df_all.groupby('Name')['AST'].sum().reset_index()

hovertemp = "<b>Name:</b> %{label}<br>" \
            "<b>Number of Career Assists:</b> %{value:,.0f}<br>" \
            "<b>Percentage:</b> %{percent:,.0%}<extra></extra>"

fig = px.pie(df_assists, values='AST', names='Name', title='Percentage of NBA Career Assists, by player', 
             color_discrete_sequence=px.colors.sequential.RdBu, height=600).update_traces(hovertemplate=hovertemp,texttemplate='%{percent:,.0%}')
fig.show()



In [None]:
# bar chart for the number of career blocks
blks_sums=[]

for x in roster:
    i=sum(df_all['BLK'][df_all['Name']==x])
    blks_sums.append(i)

combine={'Name':roster,'Blocks':blks_sums}
career_blocks=pd.DataFrame(combine)

hovertemp = "<b>Name:</b> %{label}<br>" \
            "<b>Number of Career Blocks:</b> %{value:,.0f}<br>" 

fig = px.bar(career_blocks, x="Blocks", y="Name", 
             hover_data={'Blocks': '{value:,.0f}'}, orientation='h', color_discrete_sequence=['purple'], 
             height=800,labels={'Name': 'Player', 'Blocks':'Total Career Blocks'},
             title='Total NBA Career Blocks, by player').update_yaxes(autorange='reversed').update_layout(plot_bgcolor='tan',xaxis=dict(tickformat=",")).update_xaxes(showgrid=False).update_traces(hovertemplate=hovertemp)
fig.show()

In [None]:
# pie chart for career personal fouls for all players
df_pf=df_all.groupby('Name')['PF'].sum().reset_index()

hovertemp = "<b>Name:</b> %{label}<br>" \
            "<b>Number of Career Personal Fouls:</b> %{value:,.0f}<br>" \
            "<b>Percentage:</b> %{percent:,.0%}<extra></extra>"

fig = px.pie(df_pf, values='PF', names='Name', title='Percentage of NBA Career Personal Fouls, by player', 
             color_discrete_sequence=px.colors.sequential.RdBu, height=600).update_traces(hovertemplate=hovertemp,texttemplate='%{percent:,.0%}')
fig.show()