### Dream Team project
Data science project showing information on the NBA careers of the 12 members of the 1992 US Olympic Men's basketball team. This project uses data from the NBA_API. This project includes a variety of visualizations including bar charts, pie charts, maps and scatterplots.

In [1]:
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity="all"
import matplotlib.pyplot as plt
import plotly.express as px
from nba_api.stats.static import players
from nba_api.stats.endpoints import playercareerstats
from nba_api.stats.static import teams
import plotly.graph_objects as go
from geopy.geocoders import Nominatim
import statistics

In [2]:
# get IDs of all players on roster from the NBA API
roster=['Charles Barkley','Larry Bird','Clyde Drexler','Patrick Ewing','Magic Johnson', 
        'Michael Jordan', 'Christian Laettner', 'Karl Malone','Chris Mullin',
        'Scottie Pippen', 'David Robinson', 'John Stockton']
ID=[]
for x in roster:
   my_diction=players.find_players_by_full_name(x)[0]
   counter = 1
   for i in my_diction.values():
      if counter == 1:
         ID.append(i)
      else:
         continue
      counter +=1

In [3]:
# show information about player IDs
ID
len(ID)
type(ID)
type(ID[0])

[787, 1449, 17, 121, 77142, 893, 363, 252, 904, 937, 764, 304]

12

list

int

In [4]:
#get career stats dataframes for each player and place them into a single dictionary
names=['barkley','bird','drexler','ewing','johnson', 'jordan', 'laettner', 'malone','mullin','pippen', 'robinson', 'stockton']
dataframes={}

for x,y in zip(ID, names):
    career= playercareerstats.PlayerCareerStats(player_id=x)
    df=career.get_data_frames()[0]
    dataframes[y]=df

In [5]:
#separate out dataframes in dictionary for each player
counter = 1
for x in dataframes.values():
    if counter == 1:
        barkley = x
    elif counter == 2:
        bird = x
    elif counter == 3:
        drexler = x
    elif counter == 4:
        ewing = x
    elif counter == 5:
        johnson = x
    elif counter == 6:
        jordan = x
    elif counter == 7:
        laettner = x
    elif counter == 8:
        malone = x
    elif counter == 9:
        mullin = x
    elif counter == 10:
        pippen = x
    elif counter == 11:
        robinson = x
    else:
        stockton=x
    counter +=1

In [6]:
#get information about dataframes
print(barkley.info(), bird.info(), drexler.info(), ewing.info(),
      johnson.info(), jordan.info(), laettner.info(), malone.info(),
      mullin.info(), pippen.info(), robinson.info(), stockton.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16 entries, 0 to 15
Data columns (total 27 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          16 non-null     int64  
 1   SEASON_ID          16 non-null     object 
 2   LEAGUE_ID          16 non-null     object 
 3   TEAM_ID            16 non-null     int64  
 4   TEAM_ABBREVIATION  16 non-null     object 
 5   PLAYER_AGE         16 non-null     float64
 6   GP                 16 non-null     int64  
 7   GS                 16 non-null     int64  
 8   MIN                16 non-null     int64  
 9   FGM                16 non-null     int64  
 10  FGA                16 non-null     int64  
 11  FG_PCT             16 non-null     float64
 12  FG3M               16 non-null     int64  
 13  FG3A               16 non-null     int64  
 14  FG3_PCT            16 non-null     float64
 15  FTM                16 non-null     int64  
 16  FTA                16 non-nu

In [7]:
#check for duplicate season IDs in dataframes
for name in names:
    dup=eval(name)['SEASON_ID'].value_counts()>1
    dups=sum(dup)
    print(f"In the", name, "dataframe, there are", dups, "duplicate season IDs." )

In the barkley dataframe, there are 0 duplicate season IDs.
In the bird dataframe, there are 0 duplicate season IDs.
In the drexler dataframe, there are 1 duplicate season IDs.
In the ewing dataframe, there are 0 duplicate season IDs.
In the johnson dataframe, there are 0 duplicate season IDs.
In the jordan dataframe, there are 0 duplicate season IDs.
In the laettner dataframe, there are 2 duplicate season IDs.
In the malone dataframe, there are 0 duplicate season IDs.
In the mullin dataframe, there are 0 duplicate season IDs.
In the pippen dataframe, there are 0 duplicate season IDs.
In the robinson dataframe, there are 0 duplicate season IDs.
In the stockton dataframe, there are 0 duplicate season IDs.


In [8]:
# Bar chart of the number of seasons played for each player
season_num=[]
for dataframe in dataframes.values():
    season_num.append(dataframe['SEASON_ID'].value_counts().shape[0])

bar_trace = go.Bar(x=roster, y=season_num)
fig=go.Figure(data=[bar_trace]).update_layout(title='Number of seasons members played during NBA career, by player', 
                                              xaxis_title='Player', 
                                              yaxis_title='Number of NBA seasons', plot_bgcolor='lightgrey').update_traces(marker_color='crimson')
fig.show()

In [9]:
#map of location sof teams that players played for during NBA career
#data wrangling
#add player name to each player dataframe
barkley['Name']='Charles Barkley'
bird['Name']='Larry Bird'
drexler['Name']='Clyde Drexler'
ewing['Name']='Patrick Ewing'
johnson['Name']='Magic Johnson'
jordan['Name']='Michael Jordan'
laettner['Name']='Christian Laettner'
malone['Name']='Karl Malone'
mullin['Name']='Chris Mullin'
pippen['Name']='Scottie Pippen'
robinson['Name']='David Robinson'
stockton['Name']='John Stockton'
#create single dataframe with information on all players
df_all=pd.concat([barkley, bird, drexler, ewing,
                  johnson, jordan, laettner, malone,
                  mullin, pippen, robinson, stockton], axis=0).reset_index(drop=True)
df_all.info()
df_all.head()
df_all.tail()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193 entries, 0 to 192
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          193 non-null    int64  
 1   SEASON_ID          193 non-null    object 
 2   LEAGUE_ID          193 non-null    object 
 3   TEAM_ID            193 non-null    int64  
 4   TEAM_ABBREVIATION  193 non-null    object 
 5   PLAYER_AGE         193 non-null    float64
 6   GP                 193 non-null    int64  
 7   GS                 193 non-null    int64  
 8   MIN                193 non-null    int64  
 9   FGM                193 non-null    int64  
 10  FGA                193 non-null    int64  
 11  FG_PCT             193 non-null    float64
 12  FG3M               193 non-null    int64  
 13  FG3A               193 non-null    int64  
 14  FG3_PCT            193 non-null    float64
 15  FTM                193 non-null    int64  
 16  FTA                193 non

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Name
0,787,1984-85,0,1610612755,PHL,22.0,82,60,2347,427,...,266,437,703,155,95,80,209,301,1148,Charles Barkley
1,787,1985-86,0,1610612755,PHL,23.0,80,80,2952,595,...,354,672,1026,312,173,125,350,333,1603,Charles Barkley
2,787,1986-87,0,1610612755,PHL,24.0,68,62,2740,557,...,390,604,994,331,119,104,322,252,1564,Charles Barkley
3,787,1987-88,0,1610612755,PHL,25.0,80,80,3170,753,...,385,566,951,254,100,103,304,278,2264,Charles Barkley
4,787,1988-89,0,1610612755,PHL,26.0,79,79,3088,700,...,403,583,986,325,126,67,254,262,2037,Charles Barkley


Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Name
188,304,1998-99,0,1610612762,UTA,37.0,50,50,1410,200,...,31,115,146,374,81,13,110,107,553,John Stockton
189,304,1999-00,0,1610612762,UTA,38.0,82,82,2432,363,...,45,170,215,703,143,15,179,192,990,John Stockton
190,304,2000-01,0,1610612762,UTA,39.0,82,82,2397,328,...,54,173,227,713,132,21,203,194,944,John Stockton
191,304,2001-02,0,1610612762,UTA,40.0,82,82,2570,401,...,59,204,263,674,152,24,208,209,1102,John Stockton
192,304,2002-03,0,1610612762,UTA,41.0,82,82,2275,309,...,51,150,201,629,137,16,182,184,884,John Stockton


In [10]:
#create variables for dataframe to be used for mapping teams and cities
Name=[]
Team_Abbreviation=[]
Team_Name=[]
City=[]
State=[]
#for loop to iterate through roster list to add player name, team abbreviation, team name, city, and state from API to lists
for x in roster:
    test_series=df_all['TEAM_ABBREVIATION'][df_all['Name']==x].value_counts()
    test_list=list(test_series.index)
    for y in test_list:
        Name.append(x)
        Team_Abbreviation.append(y)
        a=teams.find_team_by_abbreviation(y)
        if a is None:
            Team_Name.append(np.nan)
            City.append(np.nan)
            State.append(np.nan)
        else:
            Team_Name.append(a['full_name'])
            City.append(a['city'])
            State.append(a['state'])   
            

In [11]:
#create mapping dataframe from lists
mapdata=pd.DataFrame({'Name': Name,
                      'Team_Abbreviation':Team_Abbreviation,
                      'Team_Name': Team_Name,
                      'City': City,
                      'State':State})
#look at mapping data
mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,,,
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas
6,Clyde Drexler,TOT,,,
7,Patrick Ewing,NYK,New York Knicks,New York,New York
8,Patrick Ewing,SEA,,,
9,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida


In [12]:
#fill in missing data and replace incorrect data
condition=mapdata['Team_Abbreviation'] == 'PHL'
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Philadelphia 76ers', 'Philadelphia', 'Pennsylvania'] 

condition=(mapdata['Team_Abbreviation'] == 'TOT') & (mapdata['Name'] == 'Clyde Drexler')
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Houston Rockets', 'Houston', 'Texas'] 

condition=mapdata['Team_Abbreviation'] == 'SEA'
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Seattle SuperSonics', 'Seattle', 'Washington'] 

condition=(mapdata['Team_Abbreviation'] == 'TOT') & (mapdata['Name'] == 'Christian Laettner')
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Dallas Mavericks', 'Dallas', 'Texas'] 

condition=mapdata['Team_Abbreviation'] == 'MIN'
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Minnesota Timberwolves', 'Minneapolis', 'Minnesota'] 

condition=mapdata['Team_Abbreviation'] == 'UTH' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Utah Jazz', 'Salt Lake City', 'Utah'] 

condition=mapdata['Team_Abbreviation'] == 'UTA' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Utah Jazz', 'Salt Lake City', 'Utah'] 

condition=mapdata['Team_Abbreviation'] == 'GOS' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Golden State Warriors', 'San Francisco', 'California'] 

condition=mapdata['Team_Abbreviation'] == 'IND' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Indiana Pacers', 'Indianapolis', 'Indiana'] 

condition=mapdata['Team_Abbreviation'] == 'GSW' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['Golden State Warriors', 'San Francisco', 'California'] 

condition=mapdata['Team_Abbreviation'] == 'SAN' 
columns_to_modify=['Team_Name', 'City', 'State']
mapdata.loc[condition, columns_to_modify] = ['San Antonio Spurs', 'San Antonio', 'Texas'] 

#look at corrected dataframe
mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas
6,Clyde Drexler,TOT,Houston Rockets,Houston,Texas
7,Patrick Ewing,NYK,New York Knicks,New York,New York
8,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington
9,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida


In [13]:
#remove duplicate teams for players so that there is a single row for each team that a player played for
mapdata=mapdata.drop_duplicates(subset=['Name', 'Team_Name'], keep='first').reset_index(drop=True)
#check revised dataframe
mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas
6,Patrick Ewing,NYK,New York Knicks,New York,New York
7,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington
8,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida
9,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California


In [14]:
#for loop to create single column for City and State combined to facilitate geocoding
City_State=[]
for index, row in mapdata[['City', 'State']].iterrows():
    City_State.append(row['City'] + ", " + row['State'])
#add llist as column to dataframe
mapdata['City_State']=City_State
#check revised dataframe
mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State,City_State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania,"Philadelphia, Pennsylvania"
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona,"Phoenix, Arizona"
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas,"Houston, Texas"
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts,"Boston, Massachusetts"
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon,"Portland, Oregon"
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas,"Houston, Texas"
6,Patrick Ewing,NYK,New York Knicks,New York,New York,"New York, New York"
7,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington,"Seattle, Washington"
8,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida,"Orlando, Florida"
9,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California,"Los Angeles, California"


In [15]:
#getting coordinates for scattermap
#create lists for longitude and latitude coordinates
Longitude=[]
Latitude=[]
#use Nominatim function to interact with Nominatim geocoding service to get coordinates based on City_State column
geo=Nominatim(user_agent = "nba_map")
for x in mapdata['City_State']:
	geo_info = geo.geocode(x, timeout=None)
	Latitude.append(geo_info.latitude)
	Longitude.append(geo_info.longitude)

In [16]:
#check lists
Latitude
Longitude

[39.9527237,
 33.4484367,
 29.7589382,
 42.3554334,
 45.5202471,
 29.7589382,
 40.7127281,
 47.6038321,
 28.5421109,
 34.0536909,
 41.8755616,
 38.8950368,
 44.9772995,
 38.8950368,
 33.7544657,
 32.7762719,
 42.3315509,
 25.7741728,
 40.7596198,
 34.0536909,
 37.7792588,
 39.7683331,
 41.8755616,
 45.5202471,
 29.7589382,
 29.4246002,
 40.7596198]

[-75.1635262,
 -112.074141,
 -95.3676974,
 -71.060511,
 -122.674194,
 -95.3676974,
 -74.0060152,
 -122.330062,
 -81.3790304,
 -118.242766,
 -87.6244212,
 -77.0365427,
 -93.2654692,
 -77.0365427,
 -84.3898151,
 -96.7968559,
 -83.0466403,
 -80.19362,
 -111.886797,
 -118.242766,
 -122.4193286,
 -86.1583502,
 -87.6244212,
 -122.674194,
 -95.3676974,
 -98.4951405,
 -111.886797]

In [17]:
#create distinct coordinates with jittering to keep from having the exact same coordinates 
#for 2 different players in the same city which will cause only one player to show up in map
#create jitter ammount
jitter_amount = 0.0001
#create new Latitude and Longitude lists by taking each coordinate and adding a random number from 
#a uniform distribution with a maximum of the jitter amount and a minimum of the negative jitter amount
Latitude_new=[coord + np.random.uniform(-jitter_amount, jitter_amount) for coord in Latitude]
Longitude_new=[coord + np.random.uniform(-jitter_amount, jitter_amount) for coord in Longitude]
#check new lists
Latitude_new
Longitude_new

[39.952628462801364,
 33.44838585737587,
 29.758886437286183,
 42.35533759428446,
 45.520279497042345,
 29.75891059732277,
 40.71280877390834,
 47.60376178724478,
 28.54202210351407,
 34.0536453521235,
 41.875485856225914,
 38.89504413866382,
 44.97738765787949,
 38.89509573628581,
 33.754502241949794,
 32.7762172799807,
 42.331600930663996,
 25.774217998338816,
 40.759638559216974,
 34.053721617316064,
 37.77934405068015,
 39.76829935403526,
 41.875470432528886,
 45.52028139739622,
 29.75894263713194,
 29.424543031538725,
 40.75966691968511]

[-75.16353066644268,
 -112.07416771216643,
 -95.3676311597854,
 -71.06060566350207,
 -122.67416941590074,
 -95.36766752395607,
 -74.00594006106967,
 -122.32996969716861,
 -81.37905623693567,
 -118.24281021332764,
 -87.62439921012096,
 -77.0364744260891,
 -93.26548415816178,
 -77.03651726001787,
 -84.38975100400873,
 -96.79680629753457,
 -83.04661125926783,
 -80.19365973619617,
 -111.88673183476753,
 -118.24279742604222,
 -122.41937286803046,
 -86.15838502868931,
 -87.62447266486014,
 -122.67420884508994,
 -95.36763171350869,
 -98.49513111746894,
 -111.8867608607385]

In [18]:
#add adjusted coordinates to mapdata dataframe
mapdata['Latitude']=Latitude_new
mapdata['Longitude']=Longitude_new
#look at map data
mapdata
mapdata.info()

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State,City_State,Latitude,Longitude
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania,"Philadelphia, Pennsylvania",39.952628,-75.163531
1,Charles Barkley,PHX,Phoenix Suns,Phoenix,Arizona,"Phoenix, Arizona",33.448386,-112.074168
2,Charles Barkley,HOU,Houston Rockets,Houston,Texas,"Houston, Texas",29.758886,-95.367631
3,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts,"Boston, Massachusetts",42.355338,-71.060606
4,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon,"Portland, Oregon",45.520279,-122.674169
5,Clyde Drexler,HOU,Houston Rockets,Houston,Texas,"Houston, Texas",29.758911,-95.367668
6,Patrick Ewing,NYK,New York Knicks,New York,New York,"New York, New York",40.712809,-74.00594
7,Patrick Ewing,SEA,Seattle SuperSonics,Seattle,Washington,"Seattle, Washington",47.603762,-122.32997
8,Patrick Ewing,ORL,Orlando Magic,Orlando,Florida,"Orlando, Florida",28.542022,-81.379056
9,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California,"Los Angeles, California",34.053645,-118.24281


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27 entries, 0 to 26
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               27 non-null     object 
 1   Team_Abbreviation  27 non-null     object 
 2   Team_Name          27 non-null     object 
 3   City               27 non-null     object 
 4   State              27 non-null     object 
 5   City_State         27 non-null     object 
 6   Latitude           27 non-null     float64
 7   Longitude          27 non-null     float64
dtypes: float64(2), object(6)
memory usage: 1.8+ KB


In [19]:
#generate scattermap of locations of all NBA teams played for during their careers
fig = px.scatter_map(mapdata, lat="Latitude", 
                     lon="Longitude", 
                     hover_data={'Latitude': False, 'Longitude': False, 'Name': True, 'Team_Name': True, 'City_State': True},
                     map_style='open-street-map', 
                     title= "NBA Teams Played for during NBA Careers",
                     color_discrete_sequence=['orange'],
                     height=800, zoom=3.5).update_traces(marker=dict(size=10))
fig.show()

In [25]:
# Bar chart of the number of NBA teams played by player
number_of_teams=[]
for name in roster:
    number_of_teams.append(mapdata['Team_Name'][mapdata['Name']== name].value_counts().shape[0])  

bar_trace = go.Bar(x=roster, y=number_of_teams)
fig=go.Figure(data=[bar_trace]).update_layout(title='Number of NBA teams played for during NBA career, by player', 
                                              xaxis_title='Player', 
                                              yaxis_title='Number of NBA teams' , plot_bgcolor='lightgrey').update_traces(marker_color='purple')
  
fig.show()

In [26]:
#mode number of games played and started for each season
games_played=[]
games_started=[]
for data in dataframes.values():
    games_played.append(int(statistics.mode(data['GP'])))
    games_started.append(int(statistics.mode(data['GS'])))

bar_trace1 = go.Bar(name="Games Started", x = roster, y = games_started, marker_color="gold")
fig = go.Figure(data=[bar_trace1]).update_layout(
                  title='Most Frequent Number of NBA Games Started in a Season, by player',
                  xaxis_title="Player",
                  yaxis_title="Number of NBA Games Started", plot_bgcolor='lightgrey')
fig.show()

In [27]:
#Boxplots for the distributions for the number of minutes played each season
fig=go.Figure().add_trace(go.Box(y=barkley['MIN'], name="Charles Barkley")).add_trace(go.Box(y=bird['MIN'], name="Larry Bird")).add_trace(go.Box(y=drexler['MIN'], name="Clyde Drexler")).add_trace(go.Box(y=ewing['MIN'], name="Patrick Ewing")).add_trace(go.Box(y=johnson['MIN'], name="Magic Johnson")).add_trace(go.Box(y=jordan['MIN'], name="Michael Jordan")).add_trace(go.Box(y=laettner['MIN'], name="Christian Laettner")).add_trace(go.Box(y=malone['MIN'], name="Karl Malone")).add_trace(go.Box(y=mullin['MIN'], name="Chris Mullin")).add_trace(go.Box(y=pippen['MIN'], name="Scottie Pippen")).add_trace(go.Box(y=robinson['MIN'], name="David Robinson")).add_trace(go.Box(y=stockton['MIN'], name="John Stockton")).update_layout(title="Distribution of Total Minutes Played Each NBA Season, by player",xaxis_title="Player").update_layout(
    yaxis=dict(tickformat=",d" ))
fig.show()

In [44]:
#scatter plot showing number of field goals made by number of field goals attempted
#deal with multiple season IDs in drexler and laettner dataframes
#create new dataframe for just free throw information
#get information for players with single season IDs
target_categories = ['Charles Barkley','Larry Bird','Patrick Ewing','Magic Johnson', 
        'Michael Jordan', 'Karl Malone','Chris Mullin', 'Scottie Pippen', 'David Robinson', 'John Stockton']
is_in_target_categories = df_all['Name'].isin(target_categories)
df_ft=df_all.loc[is_in_target_categories, 
                 ['SEASON_ID','Name', 'FTM','FTA','FT_PCT']].reset_index(drop=True)
df_ft

Unnamed: 0,SEASON_ID,Name,FTM,FTA,FT_PCT
0,1984-85,Charles Barkley,293,400,0.733
1,1985-86,Charles Barkley,396,578,0.685
2,1986-87,Charles Barkley,429,564,0.761
3,1987-88,Charles Barkley,714,951,0.751
4,1988-89,Charles Barkley,602,799,0.753
...,...,...,...,...,...
154,1998-99,John Stockton,137,169,0.811
155,1999-00,John Stockton,221,257,0.860
156,2000-01,John Stockton,227,278,0.817
157,2001-02,John Stockton,275,321,0.857


In [49]:
#get free throw information for drexler
df_ft_drexler=df_all.loc[df_all['Name']=='Clyde Drexler', ['SEASON_ID','Name', 'FTM','FTA','FT_PCT']].reset_index(drop=True)
df_ft_drexler
#get free throw information for laettner
df_ft_laettner=df_all.loc[df_all['Name']=='Christian Laettner', ['SEASON_ID','Name', 'FTM','FTA','FT_PCT']].reset_index(drop=True)
df_ft_laettner


Unnamed: 0,SEASON_ID,Name,FTM,FTA,FT_PCT
0,1983-84,Clyde Drexler,123,169,0.728
1,1984-85,Clyde Drexler,223,294,0.759
2,1985-86,Clyde Drexler,293,381,0.769
3,1986-87,Clyde Drexler,357,470,0.76
4,1987-88,Clyde Drexler,476,587,0.811
5,1988-89,Clyde Drexler,438,548,0.799
6,1989-90,Clyde Drexler,333,430,0.774
7,1990-91,Clyde Drexler,416,524,0.794
8,1991-92,Clyde Drexler,401,505,0.794
9,1992-93,Clyde Drexler,245,292,0.839


Unnamed: 0,SEASON_ID,Name,FTM,FTA,FT_PCT
0,1992-93,Christian Laettner,462,553,0.835
1,1993-94,Christian Laettner,375,479,0.783
2,1994-95,Christian Laettner,409,500,0.818
3,1995-96,Christian Laettner,217,266,0.816
4,1995-96,Christian Laettner,107,130,0.823
5,1995-96,Christian Laettner,324,396,0.818
6,1996-97,Christian Laettner,359,440,0.816
7,1997-98,Christian Laettner,306,354,0.864
8,1998-99,Christian Laettner,44,57,0.772
9,1999-00,Christian Laettner,237,292,0.812


In [46]:
#scatter plot showing number of field goals made by the number of field goals attempted
fig=px.scatter(df_all, x='FGA', y='FGM', color='Name', 
               title='Field Goals Made by Field Goals Attempted for Each Season, by player', hover_data=["SEASON_ID"],
               width=1000, height=600,
               labels={"FGA": "Number of Field Goals Attempted", "FGM": "Number of Field Goals Made"}).update_layout(xaxis=dict(tickformat=","),
                                                                                                                      yaxis=dict(tickformat=",")).update_layout(
        plot_bgcolor='navy') 
fig.show()

In [47]:
#scatter plot showing number of 3-pointers goals made by the number of 3-pointers goals attempted
fig=px.scatter(df_all, x='FG3A', y='FG3M', color='Name', 
               title='3-Pointers Made by 3-Pointers Attempted for Each Season, by player', hover_data=["SEASON_ID"],
               width=1000, height=600,
               labels={"FG3A": "Number of 3-Pointers Attempted", "FG3M": "Number of 3-Pointers Made"}).update_layout(plot_bgcolor='brown') 
fig.show()

In [None]:
#bar chart based on career total number of points made during NBA career
#create list of totals for for each player
pts_sums=[]
#for loop to iterate over roster to populate list
for x in roster:
    i=sum(df_all['PTS'][df_all['Name']==x])
    pts_sums.append(i)
#create dataframe from roster and totals lists
combine={'Name':roster,'Points':pts_sums}
career_points=pd.DataFrame(combine)
#creating bar chart
fig = px.bar(career_points, x="Points", y="Name", orientation='h', color_discrete_sequence=['darkgreen'], 
             height=800,labels={'Name': 'Player', 'Points':'Total Career Points Scored'},
             title='Total NBA Career Points Scored, by player').update_yaxes(autorange='reversed').update_layout(plot_bgcolor='tan')
fig.show()

In [54]:
#career free throws

In [None]:
#career rebounds

In [None]:
#career assists

In [None]:
#career blocks

In [None]:
#career personal fouls

In [None]:
#looking at rookie season for players
#creating rookie seasons dataframe
rookie=pd.DataFrame()
for player in dataframes.values():
    row=player.loc[[0]]
    row1=pd.DataFrame(row)
    rookie=pd.concat([rookie,row1], axis=0, ignore_index=True)   

In [83]:
#look at rookie dataframe
rookie
rookie.info()

Unnamed: 0,PLAYER_ID,SEASON_ID,LEAGUE_ID,TEAM_ID,TEAM_ABBREVIATION,PLAYER_AGE,GP,GS,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,Name
0,787,1984-85,0,1610612755,PHL,22.0,82,60,2347,427,...,266,437,703,155,95,80,209,301,1148,Charles Barkley
1,1449,1979-80,0,1610612738,BOS,23.0,82,82,2955,693,...,216,636,852,370,143,53,263,279,1745,Larry Bird
2,17,1983-84,0,1610612757,POR,22.0,82,3,1408,252,...,112,123,235,153,107,29,123,209,628,Clyde Drexler
3,121,1985-86,0,1610612752,NYK,23.0,50,50,1771,386,...,124,327,451,102,54,103,172,191,998,Patrick Ewing
4,77142,1979-80,0,1610612747,LAL,20.0,77,72,2795,503,...,166,430,596,563,187,41,305,218,1387,Magic Johnson
5,893,1984-85,0,1610612741,CHI,22.0,82,82,3144,837,...,167,367,534,481,196,69,291,285,2313,Michael Jordan
6,363,1992-93,0,1610612750,MIN,23.0,81,81,2823,503,...,171,537,708,223,105,83,275,290,1472,Christian Laettner
7,252,1985-86,0,1610612762,UTH,22.0,81,76,2475,504,...,174,544,718,236,105,44,279,295,1203,Karl Malone
8,904,1985-86,0,1610612744,GOS,22.0,55,30,1391,287,...,42,73,115,105,70,23,75,130,768,Chris Mullin
9,937,1987-88,0,1610612741,CHI,22.0,79,0,1650,261,...,115,183,298,169,91,52,131,214,625,Scottie Pippen


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12 entries, 0 to 11
Data columns (total 28 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   PLAYER_ID          12 non-null     int64  
 1   SEASON_ID          12 non-null     object 
 2   LEAGUE_ID          12 non-null     object 
 3   TEAM_ID            12 non-null     int64  
 4   TEAM_ABBREVIATION  12 non-null     object 
 5   PLAYER_AGE         12 non-null     float64
 6   GP                 12 non-null     int64  
 7   GS                 12 non-null     int64  
 8   MIN                12 non-null     int64  
 9   FGM                12 non-null     int64  
 10  FGA                12 non-null     int64  
 11  FG_PCT             12 non-null     float64
 12  FG3M               12 non-null     int64  
 13  FG3A               12 non-null     int64  
 14  FG3_PCT            12 non-null     float64
 15  FTM                12 non-null     int64  
 16  FTA                12 non-nu

In [None]:
#map of teams played for during rookie seasons
#create variables for dataframe to prep for mapping rookie teams and cities
Name=[]
Team_Abbreviation=[]
Team_Name=[]
City=[]
State=[]
for x in roster:
    test_series=df_all['TEAM_ABBREVIATION'][df_all['Name']==x].value_counts()
    test_list=list(test_series.index[0])
    for y in test_list:
        Name.append(x)
        Team_Abbreviation.append(y)
        a=teams.find_team_by_abbreviation(y)
        if a is None:
            Team_Name.append(np.nan)
            City.append(np.nan)
            State.append(np.nan)
        else:
            Team_Name.append(a['full_name'])
            City.append(a['city'])
            State.append(a['state']) 

In [97]:
#create dataframe from lists
rookie_mapdata=pd.DataFrame({'Name': Name,
                      'Team_Abbreviation':Team_Abbreviation,
                      'Team_Name': Team_Name,
                      'City': City,
                      'State':State})
rookie_mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,,,
1,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
2,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
3,Patrick Ewing,NYK,New York Knicks,New York,New York
4,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California
5,Michael Jordan,CHI,Chicago Bulls,Chicago,Illinois
6,Christian Laettner,MIN,Minnesota Timberwolves,Minnesota,Minnesota
7,Karl Malone,UTH,,,
8,Chris Mullin,GOS,,,
9,Scottie Pippen,CHI,Chicago Bulls,Chicago,Illinois


In [None]:
#fill in missing data and incorrect data
condition=rookie_mapdata['Team_Abbreviation'] == 'PHL'
columns_to_modify=['Team_Name', 'City', 'State']
rookie_mapdata.loc[condition, columns_to_modify] = ['Philadelphia 76ers', 'Philadelphia', 'Pennsylvania']

condition=rookie_mapdata['Team_Abbreviation'] == 'MIN'
columns_to_modify=['Team_Name', 'City', 'State']
rookie_mapdata.loc[condition, columns_to_modify] = ['Minnesota Timberwolves', 'Minneapolis', 'Minnesota'] 

condition=rookie_mapdata['Team_Abbreviation'] == 'UTH' 
columns_to_modify=['Team_Name', 'City', 'State']
rookie_mapdata.loc[condition, columns_to_modify] = ['Utah Jazz', 'Salt Lake City', 'Utah'] 

condition=rookie_mapdata['Team_Abbreviation'] == 'GOS' 
columns_to_modify=['Team_Name', 'City', 'State']
rookie_mapdata.loc[condition, columns_to_modify] = ['Golden State Warriors', 'San Francisco', 'California']

condition=rookie_mapdata['Team_Abbreviation'] == 'SAN' 
columns_to_modify=['Team_Name', 'City', 'State']
rookie_mapdata.loc[condition, columns_to_modify] = ['San Antonio Spurs', 'San Antonio', 'Texas']

rookie_mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania
1,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts
2,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon
3,Patrick Ewing,NYK,New York Knicks,New York,New York
4,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California
5,Michael Jordan,CHI,Chicago Bulls,Chicago,Illinois
6,Christian Laettner,MIN,Minnesota Timberwolves,Minneapolis,Minnesota
7,Karl Malone,UTH,Utah Jazz,Salt Lake City,Utah
8,Chris Mullin,GOS,Golden State Warriors,San Francisco,California
9,Scottie Pippen,CHI,Chicago Bulls,Chicago,Illinois


In [99]:
#create single column for City and State combined
City_State=[]
for index, row in rookie_mapdata[['City', 'State']].iterrows():
    City_State.append(row['City'] + ", " + row['State'])
City_State=pd.DataFrame(City_State)
#append 2 dataframes
rookie_mapdata=pd.concat([rookie_mapdata,City_State], axis=1)
#rename new column
rookie_mapdata=rookie_mapdata.rename(columns={0:'City_State'})
#check data
rookie_mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State,City_State
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania,"Philadelphia, Pennsylvania"
1,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts,"Boston, Massachusetts"
2,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon,"Portland, Oregon"
3,Patrick Ewing,NYK,New York Knicks,New York,New York,"New York, New York"
4,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California,"Los Angeles, California"
5,Michael Jordan,CHI,Chicago Bulls,Chicago,Illinois,"Chicago, Illinois"
6,Christian Laettner,MIN,Minnesota Timberwolves,Minneapolis,Minnesota,"Minneapolis, Minnesota"
7,Karl Malone,UTH,Utah Jazz,Salt Lake City,Utah,"Salt Lake City, Utah"
8,Chris Mullin,GOS,Golden State Warriors,San Francisco,California,"San Francisco, California"
9,Scottie Pippen,CHI,Chicago Bulls,Chicago,Illinois,"Chicago, Illinois"


In [100]:
#getting coordinates for scattermap
#create lists for longitude and latitude coordinates
Longitude=[]
Latitude=[]
#geocoding to put coordinates into lists
geo=Nominatim(user_agent = "nba_map")
for x in rookie_mapdata['City_State']:
	l1 = geo.geocode(x, timeout=None)
	Latitude.append(l1.latitude)
	Longitude.append(l1.longitude)

In [101]:
#look at lists
Longitude
Latitude

[-75.1635262,
 -71.060511,
 -122.674194,
 -74.0060152,
 -118.242766,
 -87.6244212,
 -93.2654692,
 -111.886797,
 -122.4193286,
 -87.6244212,
 -98.4951405,
 -111.886797]

[39.9527237,
 42.3554334,
 45.5202471,
 40.7127281,
 34.0536909,
 41.8755616,
 44.9772995,
 40.7596198,
 37.7792588,
 41.8755616,
 29.4246002,
 40.7596198]

In [102]:
#jittering with coordinates so that each player has unique coordinates
#create jitter ammount
jitter_amount = 0.0001
#create new Latitude and Longitude lists
Latitude_new=[coord + np.random.uniform(-jitter_amount, jitter_amount) for coord in Latitude]
Longitude_new=[coord + np.random.uniform(-jitter_amount, jitter_amount) for coord in Longitude]
#checkk new lists
Latitude_new
Longitude_new

[39.95277510386749,
 42.355467851037325,
 45.520328107790604,
 40.712782077997204,
 34.05361750777415,
 41.87551511271479,
 44.97723017959061,
 40.75968572482548,
 37.7793317592269,
 41.875603948936636,
 29.42457358992882,
 40.759705527187734]

[-75.16347951673333,
 -71.0605891400511,
 -122.67422519406443,
 -74.00592497371322,
 -118.24281905618936,
 -87.62445480209792,
 -93.26541697621393,
 -111.88672456437277,
 -122.41923279499736,
 -87.62437366384405,
 -98.49520267048996,
 -111.88677189725178]

In [103]:
#add adjusted coordinates to rookie_mapdata dataframe
#make dataframes out of coordinate lists
Latitude=pd.DataFrame(Latitude_new)
Longitude=pd.DataFrame(Longitude_new)
#adding Latitude
rookie_mapdata=pd.concat([rookie_mapdata,Latitude], axis=1).reset_index(drop=True)
rookie_mapdata=rookie_mapdata.rename(columns={0:'Latitude'})
#adding Longitude
rookie_mapdata=pd.concat([rookie_mapdata,Longitude], axis=1).reset_index(drop=True)
rookie_mapdata=rookie_mapdata.rename(columns={0:'Longitude'})
rookie_mapdata

Unnamed: 0,Name,Team_Abbreviation,Team_Name,City,State,City_State,Latitude,Longitude
0,Charles Barkley,PHL,Philadelphia 76ers,Philadelphia,Pennsylvania,"Philadelphia, Pennsylvania",39.952775,-75.16348
1,Larry Bird,BOS,Boston Celtics,Boston,Massachusetts,"Boston, Massachusetts",42.355468,-71.060589
2,Clyde Drexler,POR,Portland Trail Blazers,Portland,Oregon,"Portland, Oregon",45.520328,-122.674225
3,Patrick Ewing,NYK,New York Knicks,New York,New York,"New York, New York",40.712782,-74.005925
4,Magic Johnson,LAL,Los Angeles Lakers,Los Angeles,California,"Los Angeles, California",34.053618,-118.242819
5,Michael Jordan,CHI,Chicago Bulls,Chicago,Illinois,"Chicago, Illinois",41.875515,-87.624455
6,Christian Laettner,MIN,Minnesota Timberwolves,Minneapolis,Minnesota,"Minneapolis, Minnesota",44.97723,-93.265417
7,Karl Malone,UTH,Utah Jazz,Salt Lake City,Utah,"Salt Lake City, Utah",40.759686,-111.886725
8,Chris Mullin,GOS,Golden State Warriors,San Francisco,California,"San Francisco, California",37.779332,-122.419233
9,Scottie Pippen,CHI,Chicago Bulls,Chicago,Illinois,"Chicago, Illinois",41.875604,-87.624374


In [105]:
#generate scattermap of locations of the NBA teams played for during their rookie seasons
fig = px.scatter_map(rookie_mapdata, lat="Latitude", 
                     lon="Longitude", 
                     hover_data={'Latitude': False, 'Longitude': False, 'Name': True, 'Team_Name': True, 'City_State': True},
                     map_style='open-street-map', 
                     title= "NBA Teams that the 1992 Dream Team played for during their Rookie Season",
                     color_discrete_sequence=['orange'],
                     height=800, zoom=3.5).update_traces(marker=dict(size=10))
fig.show()