# NBA Players Through the Years

This is a Data Science Project that will analyze players through the years from 1996 to 2019. The data used for the project is from the [NBA Players Dataset](https://www.kaggle.com/datasets/justinas/nba-players-data) by Justinas Cirtutas found on Kaggle.

### Import Libraries

In [85]:
import pandas as pd
from utils.scrape import load_dict

### Loading the Data

In [36]:
raw_df = pd.read_csv('player_info.csv', index_col=False)

In [39]:
# drop the index column which is unnecessary
raw_df.drop('Unnamed: 0', axis=1)

Unnamed: 0,player_name,team_abbreviation,age,player_height,player_weight,college,country,draft_year,draft_round,draft_number,...,pts,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season
0,Travis Knight,LAL,22.0,213.36,106.594120,Connecticut,USA,1996,1,29,...,4.8,4.5,0.5,6.2,0.127,0.182,0.142,0.536,0.052,1996-97
1,Matt Fish,MIA,27.0,210.82,106.594120,North Carolina-Wilmington,USA,1992,2,50,...,0.3,0.8,0.0,-15.1,0.143,0.267,0.265,0.333,0.000,1996-97
2,Matt Bullard,HOU,30.0,208.28,106.594120,Iowa,USA,Undrafted,Undrafted,Undrafted,...,4.5,1.6,0.9,0.9,0.016,0.115,0.151,0.535,0.099,1996-97
3,Marty Conlon,BOS,29.0,210.82,111.130040,Providence,USA,Undrafted,Undrafted,Undrafted,...,7.8,4.4,1.4,-9.0,0.083,0.152,0.167,0.542,0.101,1996-97
4,Martin Muursepp,DAL,22.0,205.74,106.594120,,USA,1996,1,25,...,3.7,1.6,0.5,-14.5,0.109,0.118,0.233,0.482,0.114,1996-97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11695,Matthew Dellavedova,CLE,30.0,190.50,90.718400,St.Mary's College of California,Australia,Undrafted,Undrafted,Undrafted,...,2.8,1.8,4.5,-3.1,0.029,0.085,0.125,0.312,0.337,2020-21
11696,Maurice Harkless,SAC,28.0,200.66,99.790240,St. John's,USA,2012,1,15,...,5.2,2.4,1.2,-2.9,0.017,0.097,0.114,0.527,0.071,2020-21
11697,Max Strus,MIA,25.0,195.58,97.522280,DePaul,USA,Undrafted,Undrafted,Undrafted,...,6.1,1.1,0.6,-4.2,0.011,0.073,0.179,0.597,0.074,2020-21
11698,Marcus Morris Sr.,LAC,31.0,203.20,98.883056,Kansas,USA,2011,1,14,...,13.4,4.1,1.0,4.2,0.025,0.133,0.194,0.614,0.056,2020-21


### Preprocess Data

In [86]:
def add_positions(raw_df, names_to_positions):
    """Adds positions for each player in the data frame"""
    raw_df['positions'] = raw_df['player_name'].map(names_to_positions)
    return raw_df

In [87]:
fn = 'names_to_positions.txt'
# dictionary mapping from names to positions
names_to_positions = load_dict(fn)

df = add_positions(raw_df, names_to_positions)

In [88]:
df

Unnamed: 0.1,Unnamed: 0,player_name,team_abbreviation,age,player_height,player_weight,college,country,draft_year,draft_round,...,reb,ast,net_rating,oreb_pct,dreb_pct,usg_pct,ts_pct,ast_pct,season,positions
0,0,Travis Knight,LAL,22.0,213.36,106.594120,Connecticut,USA,1996,1,...,4.5,0.5,6.2,0.127,0.182,0.142,0.536,0.052,1996-97,C
1,1,Matt Fish,MIA,27.0,210.82,106.594120,North Carolina-Wilmington,USA,1992,2,...,0.8,0.0,-15.1,0.143,0.267,0.265,0.333,0.000,1996-97,C
2,2,Matt Bullard,HOU,30.0,208.28,106.594120,Iowa,USA,Undrafted,Undrafted,...,1.6,0.9,0.9,0.016,0.115,0.151,0.535,0.099,1996-97,F
3,3,Marty Conlon,BOS,29.0,210.82,111.130040,Providence,USA,Undrafted,Undrafted,...,4.4,1.4,-9.0,0.083,0.152,0.167,0.542,0.101,1996-97,F-C
4,4,Martin Muursepp,DAL,22.0,205.74,106.594120,,USA,1996,1,...,1.6,0.5,-14.5,0.109,0.118,0.233,0.482,0.114,1996-97,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11695,11695,Matthew Dellavedova,CLE,30.0,190.50,90.718400,St.Mary's College of California,Australia,Undrafted,Undrafted,...,1.8,4.5,-3.1,0.029,0.085,0.125,0.312,0.337,2020-21,G
11696,11696,Maurice Harkless,SAC,28.0,200.66,99.790240,St. John's,USA,2012,1,...,2.4,1.2,-2.9,0.017,0.097,0.114,0.527,0.071,2020-21,F-G
11697,11697,Max Strus,MIA,25.0,195.58,97.522280,DePaul,USA,Undrafted,Undrafted,...,1.1,0.6,-4.2,0.011,0.073,0.179,0.597,0.074,2020-21,G
11698,11698,Marcus Morris Sr.,LAC,31.0,203.20,98.883056,Kansas,USA,2011,1,...,4.1,1.0,4.2,0.025,0.133,0.194,0.614,0.056,2020-21,


In [89]:
names_to_positions.get('Marcus Morris Sr.')

In [93]:
names_to_positions['Marcus Morris']

'F'

In [101]:
len(df[df.positions.notna()])

10896

In [98]:
names_to_positions.get('Martin Müürsepp')

'F'