# NBA

In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [51]:
!pip install nba_api



In [52]:
# Get all players
from nba_api.stats.static import players

nba_players = players.get_players()

nba_df = pd.DataFrame(nba_players)
nba_df["league"] = "NBA"

wnba_players = players.get_wnba_players()
wnba_df = pd.DataFrame(wnba_players)
wnba_df["league"] = "WNBA"

# Combine
bball_players = pd.concat([nba_df, wnba_df], ignore_index=True)

# Get info
bball_players.info()

# Get first 5 rows
bball_players.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6135 entries, 0 to 6134
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   id          6135 non-null   int64 
 1   full_name   6135 non-null   object
 2   first_name  6135 non-null   object
 3   last_name   6135 non-null   object
 4   is_active   6135 non-null   bool  
 5   league      6135 non-null   object
dtypes: bool(1), int64(1), object(4)
memory usage: 245.8+ KB


Unnamed: 0,id,full_name,first_name,last_name,is_active,league
0,76001,Alaa Abdelnaby,Alaa,Abdelnaby,False,NBA
1,76002,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,NBA
2,76003,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,NBA
3,51,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,NBA
4,1505,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,NBA


In [53]:
# Write to CSV
bball_players.to_csv('./data/bball_players.csv', index=False)

# NFL

In [54]:
!pip install nfl_data_py



In [55]:
import nfl_data_py as nfl

# Get all players
all_nfl_players = nfl.import_ids()

# Print columns
all_nfl_players.columns

Index(['height', 'yahoo_id', 'pfr_id', 'draft_pick', 'gsis_id', 'stats_id',
       'position', 'name', 'rotoworld_id', 'weight', 'sportradar_id',
       'birthdate', 'twitter_username', 'fantasy_data_id', 'sleeper_id',
       'nfl_id', 'swish_id', 'ktc_id', 'cbs_id', 'team', 'age', 'cfbref_id',
       'espn_id', 'draft_ovr', 'rotowire_id', 'draft_round', 'stats_global_id',
       'pff_id', 'merge_name', 'mfl_id', 'db_season', 'college', 'draft_year',
       'fantasypros_id', 'fleaflicker_id'],
      dtype='object')

In [56]:
# Keep stats_id and full_name
nfl_players = all_nfl_players[['stats_id', 'name']]

# Rename name to full_name
nfl_players.rename(columns={'name': 'full_name'}, inplace=True)

# Create first and last name columns
nfl_players['first_name'] = nfl_players['full_name'].apply(lambda x: x.split(' ')[0])

def last_name_func(name):
    len_name = len(name.split(' '))
    if len_name == 1:
        return name
    else:
        return ' '.join(name.split(' ')[1:])

    


nfl_players['last_name'] = nfl_players['full_name'].apply(last_name_func)

# Rename stats_id to id
nfl_players.rename(columns={'stats_id': 'id'}, inplace=True)

# Order id, first_name, last_name, full_name
nfl_players = nfl_players[['id', 'first_name', 'last_name', 'full_name']]
nfl_players['league'] = 'NFL'

print(nfl_players.info())

nfl_players.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11756 entries, 0 to 11755
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          9489 non-null   float64
 1   first_name  11756 non-null  object 
 2   last_name   11756 non-null  object 
 3   full_name   11756 non-null  object 
 4   league      11756 non-null  object 
dtypes: float64(1), object(4)
memory usage: 459.3+ KB
None


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nfl_players.rename(columns={'name': 'full_name'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nfl_players['first_name'] = nfl_players['full_name'].apply(lambda x: x.split(' ')[0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nfl_players['last_name'] = nfl_players['full_name'].apply(last_name_func)
A value is trying to be set on a copy 

Unnamed: 0,id,first_name,last_name,full_name,league
0,40900.0,Caleb,Williams,Caleb Williams,NFL
1,40881.0,Drake,Maye,Drake Maye,NFL
2,40896.0,Jayden,Daniels,Jayden Daniels,NFL
3,40875.0,Bo,Nix,Bo Nix,NFL
4,40889.0,Michael,Penix Jr.,Michael Penix Jr.,NFL


In [57]:
# Save to CSV
nfl_players.to_csv('./data/nfl_players.csv', index=False)

# All Sports

In [58]:
# Combine all csvs in data folder besides all_players.csv
import os

# Get all files in data folder
files = os.listdir('./data')

# Remove all_players.csv
try:
    files.remove('all_players.csv')
except:
    pass

# Create empty list to store DataFrames
dfs = []

# Loop through files
for file in files:
    # Read csv
    df = pd.read_csv(f'./data/{file}')
    # Append to dfs
    dfs.append(df)

# Concatenate all DataFrames in dfs
all_players = pd.concat(dfs)

all_players.head()

Unnamed: 0,id,full_name,first_name,last_name,is_active,league
0,76001.0,Alaa Abdelnaby,Alaa,Abdelnaby,False,NBA
1,76002.0,Zaid Abdul-Aziz,Zaid,Abdul-Aziz,False,NBA
2,76003.0,Kareem Abdul-Jabbar,Kareem,Abdul-Jabbar,False,NBA
3,51.0,Mahmoud Abdul-Rauf,Mahmoud,Abdul-Rauf,False,NBA
4,1505.0,Tariq Abdul-Wahad,Tariq,Abdul-Wahad,False,NBA


In [59]:
# Rename id to api_id
all_players.rename(columns={'id': 'api_id'}, inplace=True)

# Sort by full_name
all_players.sort_values('full_name', inplace=True)

# Create id column
all_players['id'] = range(1, len(all_players) + 1)

# Reorder columns
all_players = all_players[['id', 'api_id', 'first_name', 'last_name', 'full_name', 'league']]

# Reset index
all_players.reset_index(drop=True, inplace=True)

# print info
all_players.info()

# Save to CSV
all_players.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17891 entries, 0 to 17890
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   id          17891 non-null  int64  
 1   api_id      15624 non-null  float64
 2   first_name  17882 non-null  object 
 3   last_name   17891 non-null  object 
 4   full_name   17891 non-null  object 
 5   league      17891 non-null  object 
dtypes: float64(1), int64(1), object(4)
memory usage: 838.8+ KB


Unnamed: 0,id,api_id,first_name,last_name,full_name,league
0,1,25480.0,,Ben Chappell,Ben Chappell,NFL
1,2,201510.0,A'Quonesia,Franklin,A'Quonesia Franklin,WNBA
2,3,29280.0,A'Shawn,Robinson,A'Shawn Robinson,NFL
3,4,203407.0,A'dia,Mathies,A'dia Mathies,WNBA
4,5,1628932.0,A'ja,Wilson,A'ja Wilson,WNBA


In [60]:
# Save to CSV
all_players.to_csv('./data/all_players.csv', index=False)