## Import Libraries

In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import time

## Data Scraping

In [2]:
chrome = webdriver.Chrome()

list_of_players = []
list_of_teams = []
list_of_players_shooting = []

teams = {
    'ATL': 'Atlanta Hawks', 
    'BOS': 'Boston Celtics', 
    'BRK': 'Brooklyn Nets', 
    'CHO': 'Charlotte Hornets',
    'CHI': 'Chicago Bulls', 
    'CLE': 'Cleveland Caveliers', 
    'DAL': 'Dallas Mavericks', 
    'DEN': 'Denver Nuggets', 
    'DET': 'Detroit Pistons', 
    'GSW': 'Golden State Warriors', 
    'HOU': 'Houston Rockets', 
    'IND': 'Indiana Pacers', 
    'LAC': 'Los Angeles Clippers', 
    'LAL': 'Los Angeles Lakers', 
    'MEM': 'Memphis Grizzlies', 
    'MIA': 'Miami Heat', 
    'MIL': 'Milwaukee Bucks', 
    'MIN': 'Minnesota Timberwolves', 
    'NOP': 'New Oreland Pelicans', 
    'NYK': 'New York Knicks', 
    'OKC': 'Oklahoma City Thunder', 
    'ORL': 'Orlando Magic', 
    'PHI': 'Philadelphia 76ers', 
    'PHO': 'Pheonix Suns', 
    'POR': 'Portland Trailblazers', 
    'SAC': 'Sacremento Kings', 
    'SAS': 'San Antonio Spurs', 
    'TOR': 'Toronto Raptors', 
    'UTA': 'Utah Jazz' , 
    'WAS': 'Washington Wizards'
    }

start_year = 2015
end_year = 2024

#get players and teams average stats from 2015-2024
while start_year <= end_year:
    for team, team_full in teams.items(): #replace team name with full name
        chrome.get(f'https://www.basketball-reference.com/teams/{team}/{start_year}.html') #go to the website
        time.sleep(0.1)
        soup = BeautifulSoup(chrome.page_source, 'lxml') #use BeautifulSoup to look for specified html tag and scrape text
        
        for list_item in soup.find_all('table', id="per_game_stats"): #append players stat
            for cell in list_item.find('tbody').find_all('tr'):
                cells = cell.find_all('td')
                row_data = [cell.text.strip() if cell.text.strip() else 0 for cell in cells] #replace missing percentage as 0
                row_data.insert(0, team_full) #insert team
                row_data.insert(0, f"{start_year-1}-{str(start_year)[2:4]}") #insert season
                list_of_players.append(row_data)
    
    
        for cell in soup.find('table', id="team_and_opponent").find('tbody').find_all('tr', {"data-row": "1"}): #append team stats
            cells = cell.find_all('td')
            row_data = [cell.text.strip() if cell.text.strip() else team_full for cell in cells] #replace missing percentage as 0
            row_data.insert(0, f"{start_year-1}-{str(start_year)[2:4]}") #insert season
            list_of_teams.append(row_data)
            
        
        for row in soup.find('table', id="team_misc").find('tbody').find_all('tr', {"data-row": "0"}): #append team records
            wins = row.find('td', {"data-stat": "wins"})
            losses = row.find('td', {"data-stat": "losses"})
            list_of_teams[-1].extend([wins.text.strip(), losses.text.strip()])
            
        try:   
            for row in soup.find('ul', id="bling").find('li', {"class": "important special"}): #append championships
                list_of_teams[-1].extend("1")
        except:
            list_of_teams[-1].extend("0")
            
        for list_item in soup.find_all('table', id="shooting"): #append shooting percentage stat of players
            for cell in list_item.find('tbody').find_all('tr'):
                cells = cell.find_all('td')
                row_data = [cell.text.strip() if cell.text.strip() else 0 for cell in cells]
                row_data.insert(0, team_full) #insert team
                row_data.insert(0, f"{start_year-1}-{str(start_year)[2:4]}") #insert season
                list_of_players_shooting.append(row_data)
    
    
    start_year += 1

chrome.quit()

In [3]:
df1 = pd.DataFrame(
    list_of_players,
    columns=[
        'Season',
        'Team',
        'Player',
        'Age',
        'Position',
        'Games',
        'Games_Started',
        'Minutes_Played',
        'Field_Goals',
        'Field_Goal_Attempts',
        'Field_Goal_Percentage',
        'Three_Point_Field_Goals',
        'Three_Point_Field_Goal_Attempts',
        'Three_Point_Field_Goal_Percentage',
        'Two_Point_Field_Goals',
        'Two_Point_Field_Goal_Attempts',
        'Two_Point_Field_Goal_Percentage',
        'Effective_Field_Goal_Percentage',
        'Free_Throws',
        'Free_Throw_Attempts',
        'Free_Throw_Percentage',
        'Offensive_Rebounds',
        'Defensive_Rebounds',
        'Total_Rebounds',
        'Assists',
        'Steals',
        'Blocks',
        'Turnovers',
        'Personal_Fouls',
        'Points',
        'Awards'
    ]
)

df2 = pd.DataFrame(
    list_of_teams,
    columns=[
        'Season',
        'Team',
        'Minutes_Played',
        'Field_Goals',
        'Field_Goal_Attempts',
        'Field_Goal_Percentage',
        'Three_Point_Field_Goals',
        'Three_Point_Field_Goal_Attempts',
        'Three_Point_Field_Goal_Percentage',
        'Two_Point_Field_Goals',
        'Two_Point_Field_Goal_Attempts',
        'Two_Point_Field_Goal_Percentage',
        'Free_Throws',
        'Free_Throw_Attempts',
        'Free_Throw_Percentage',
        'Offensive_Rebounds',
        'Defensive_Rebounds',
        'Total_Rebounds',
        'Assists',
        'Steals',
        'Blocks',
        'Turnovers',
        'Personal_Fouls',
        'Points',
        'Wins',
        'Losses',
        'NBA_Championships'
    ]
)

df3 = pd.DataFrame(
    list_of_players_shooting, 
    columns=[
        'Season', 
        'Team', 
        'Player', 
        'Age', 
        'Position', 
        'Games', 
        'Games_Started', 
        'Minutes_Played', 
        'Field_Goal_Percentage', 
        'Average_distance_(ft.)_of_FGA', 
        'Percentage_of_FGAs_that_are_2_Pt_FGAs', 
        'Percentage_of_FGAs_that_are_0_to_3_feet_from_the_basket', 
        'Percentage_of_FGAs_that_are_3_to_10_feet_from_the_basket', 
        'Percentage_of_FGAs_that_are_10_to_16_feet_from_the_basket', 
        'Percentage_of_FGAs_that_are_more_than_16_feet_from_the_basket', 
        'Percentage_of_FGAs_that_are_3_Pt_FGAs', 
        'FG_percentage_on_2_Pt_FGAs', 
        'FG_percentage_on_FGAs_that_are_0_to_3_feet_from_the_basket', 
        'FG_percentage_on_FGAs_that_are_3_to_10_feet_from_the_basket', 
        'FG_percentage_on_FGAs_that_are_10_to_16_feet_from_the_basket', 
        'FG_percentage_on_FGAs_that_are_more_than_16_feet_from_the_basket', 
        'FG_percentage_on_FGAs_that_are_3_Pt_FGAs', 
        'Percentage_of_2_Pt_FGs_that_were_assisted', 
        'Percentage_of_3_Pt_FGs_that_were_assisted', 
        'Percentage_of_FGs_that_are_dunk_attempts', 
        'Number_of_made_dunk_attempts', 
        'Percentage_of_3_Pt_FGAs_from_the_corner', 
        'FG_percentage_of_3_Pt_FGAs_from_the_corner', 
        'Heaves_attempts_(beyond_half_court)', 
        'Heaves_made_(beyond_half_court)', 
        'Awards'
    ]
)

In [4]:
df1.head()

Unnamed: 0,Season,Team,Player,Age,Position,Games,Games_Started,Minutes_Played,Field_Goals,Field_Goal_Attempts,...,Offensive_Rebounds,Defensive_Rebounds,Total_Rebounds,Assists,Steals,Blocks,Turnovers,Personal_Fouls,Points,Awards
0,2014-15,Atlanta Hawks,Paul Millsap,29,PF,73,73,32.7,6.1,12.7,...,1.9,5.9,7.8,3.1,1.8,0.9,2.3,2.8,16.7,AS
1,2014-15,Atlanta Hawks,Kyle Korver,33,SG,75,75,32.2,3.9,8.0,...,0.2,3.9,4.1,2.6,0.7,0.6,1.4,1.9,12.1,AS
2,2014-15,Atlanta Hawks,DeMarre Carroll,28,SF,70,69,31.3,4.5,9.3,...,1.4,3.9,5.3,1.7,1.3,0.2,1.1,2.2,12.6,DPOY-15
3,2014-15,Atlanta Hawks,Al Horford,28,C,76,76,30.5,6.8,12.7,...,1.7,5.4,7.2,3.2,0.9,1.3,1.3,1.6,15.2,AS
4,2014-15,Atlanta Hawks,Jeff Teague,26,PG,73,72,30.5,5.6,12.2,...,0.4,2.1,2.5,7.0,1.7,0.4,2.8,1.9,15.9,AS


In [5]:
df2.head()

Unnamed: 0,Season,Team,Minutes_Played,Field_Goals,Field_Goal_Attempts,Field_Goal_Percentage,Three_Point_Field_Goals,Three_Point_Field_Goal_Attempts,Three_Point_Field_Goal_Percentage,Two_Point_Field_Goals,...,Total_Rebounds,Assists,Steals,Blocks,Turnovers,Personal_Fouls,Points,Wins,Losses,NBA_Championships
0,2014-15,Atlanta Hawks,240.6,38.1,81.7,0.466,10.0,26.2,0.38,28.1,...,40.6,25.7,9.1,4.6,14.2,17.8,102.5,60,22,0
1,2014-15,Boston Celtics,242.4,38.9,87.9,0.443,8.0,24.6,0.327,30.9,...,43.8,24.5,8.2,3.6,13.8,21.2,101.4,40,42,0
2,2014-15,Brooklyn Nets,243.0,37.4,83.0,0.451,6.6,19.9,0.331,30.8,...,42.4,20.9,7.0,4.1,13.8,19.3,98.0,38,44,0
3,2014-15,Charlotte Hornets,242.7,35.5,84.5,0.42,6.1,19.1,0.318,29.5,...,44.1,20.2,6.1,5.5,11.9,18.2,94.2,33,49,0
4,2014-15,Chicago Bulls,242.4,36.6,82.9,0.442,7.9,22.3,0.353,28.7,...,45.7,21.7,6.3,5.8,14.0,18.2,100.8,50,32,0


In [6]:
df3.head()

Unnamed: 0,Season,Team,Player,Age,Position,Games,Games_Started,Minutes_Played,Field_Goal_Percentage,Average_distance_(ft.)_of_FGA,...,FG_percentage_on_FGAs_that_are_3_Pt_FGAs,Percentage_of_2_Pt_FGs_that_were_assisted,Percentage_of_3_Pt_FGs_that_were_assisted,Percentage_of_FGs_that_are_dunk_attempts,Number_of_made_dunk_attempts,Percentage_of_3_Pt_FGAs_from_the_corner,FG_percentage_of_3_Pt_FGAs_from_the_corner,Heaves_attempts_(beyond_half_court),Heaves_made_(beyond_half_court),Awards
0,2014-15,Atlanta Hawks,Kyle Korver,33,SG,75,75,2418,0.487,21.7,...,0.492,0.915,0.964,0.005,3,0.247,0.541,0,0,AS
1,2014-15,Atlanta Hawks,Paul Millsap,29,PF,73,73,2390,0.476,9.8,...,0.356,0.59,0.948,0.057,44,0.241,0.442,0,0,AS
2,2014-15,Atlanta Hawks,Al Horford,28,C,76,76,2318,0.538,10.2,...,0.306,0.789,0.909,0.088,81,0.806,0.379,0,0,AS
3,2014-15,Atlanta Hawks,Jeff Teague,26,PG,73,72,2228,0.46,10.0,...,0.343,0.193,0.69,0.024,20,0.116,0.333,0,0,AS
4,2014-15,Atlanta Hawks,DeMarre Carroll,28,SF,70,69,2189,0.487,13.8,...,0.395,0.727,0.992,0.018,12,0.438,0.444,0,0,DPOY-15


## Remove Missing Values

In [7]:
df1.isnull().sum().sort_values(ascending=False)

Two_Point_Field_Goal_Attempts        4
Two_Point_Field_Goal_Percentage      4
Points                               4
Personal_Fouls                       4
Turnovers                            4
Blocks                               4
Steals                               4
Assists                              4
Total_Rebounds                       4
Defensive_Rebounds                   4
Offensive_Rebounds                   4
Free_Throw_Percentage                4
Free_Throw_Attempts                  4
Free_Throws                          4
Effective_Field_Goal_Percentage      4
Awards                               4
Two_Point_Field_Goals                4
Three_Point_Field_Goal_Percentage    4
Three_Point_Field_Goal_Attempts      4
Three_Point_Field_Goals              4
Field_Goal_Percentage                4
Field_Goal_Attempts                  4
Field_Goals                          4
Minutes_Played                       4
Games_Started                        4
Games                    

In [8]:
df2.isnull().sum().sort_values(ascending=False)

Season                               0
Free_Throw_Percentage                0
Losses                               0
Wins                                 0
Points                               0
Personal_Fouls                       0
Turnovers                            0
Blocks                               0
Steals                               0
Assists                              0
Total_Rebounds                       0
Defensive_Rebounds                   0
Offensive_Rebounds                   0
Free_Throw_Attempts                  0
Team                                 0
Free_Throws                          0
Two_Point_Field_Goal_Percentage      0
Two_Point_Field_Goal_Attempts        0
Two_Point_Field_Goals                0
Three_Point_Field_Goal_Percentage    0
Three_Point_Field_Goal_Attempts      0
Three_Point_Field_Goals              0
Field_Goal_Percentage                0
Field_Goal_Attempts                  0
Field_Goals                          0
Minutes_Played           

In [9]:
df3.isnull().sum().sort_values(ascending=False)

Percentage_of_FGAs_that_are_3_Pt_FGAs                               8
FG_percentage_on_2_Pt_FGAs                                          8
Heaves_made_(beyond_half_court)                                     8
Heaves_attempts_(beyond_half_court)                                 8
FG_percentage_of_3_Pt_FGAs_from_the_corner                          8
Percentage_of_3_Pt_FGAs_from_the_corner                             8
Number_of_made_dunk_attempts                                        8
Percentage_of_FGs_that_are_dunk_attempts                            8
Percentage_of_3_Pt_FGs_that_were_assisted                           8
Percentage_of_2_Pt_FGs_that_were_assisted                           8
FG_percentage_on_FGAs_that_are_3_Pt_FGAs                            8
FG_percentage_on_FGAs_that_are_more_than_16_feet_from_the_basket    8
FG_percentage_on_FGAs_that_are_10_to_16_feet_from_the_basket        8
FG_percentage_on_FGAs_that_are_3_to_10_feet_from_the_basket         8
FG_percentage_on_FGA

In [10]:
df1 = df1.dropna()
df3 = df3.dropna()

## Change Columns

In [11]:
#change awards to all-star (instead of listing every award that player got during that season, only look at whether he was an all star)
df1.rename(columns={'Awards': 'All_Star'}, inplace=True)
df1['All_Star'] = df1['All_Star'].str.match(r"[^\ ]*AS\W*").fillna(False).astype(int)
df1

  df1['All_Star'] = df1['All_Star'].str.match(r"[^\ ]*AS\W*").fillna(False).astype(int)


Unnamed: 0,Season,Team,Player,Age,Position,Games,Games_Started,Minutes_Played,Field_Goals,Field_Goal_Attempts,...,Offensive_Rebounds,Defensive_Rebounds,Total_Rebounds,Assists,Steals,Blocks,Turnovers,Personal_Fouls,Points,All_Star
0,2014-15,Atlanta Hawks,Paul Millsap,29,PF,73,73,32.7,6.1,12.7,...,1.9,5.9,7.8,3.1,1.8,0.9,2.3,2.8,16.7,1
1,2014-15,Atlanta Hawks,Kyle Korver,33,SG,75,75,32.2,3.9,8.0,...,0.2,3.9,4.1,2.6,0.7,0.6,1.4,1.9,12.1,1
2,2014-15,Atlanta Hawks,DeMarre Carroll,28,SF,70,69,31.3,4.5,9.3,...,1.4,3.9,5.3,1.7,1.3,0.2,1.1,2.2,12.6,0
3,2014-15,Atlanta Hawks,Al Horford,28,C,76,76,30.5,6.8,12.7,...,1.7,5.4,7.2,3.2,0.9,1.3,1.3,1.6,15.2,1
4,2014-15,Atlanta Hawks,Jeff Teague,26,PG,73,72,30.5,5.6,12.2,...,0.4,2.1,2.5,7.0,1.7,0.4,2.8,1.9,15.9,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6069,2023-24,Washington Wizards,Eugene Omoruyi,26,SF,43,0,9.1,1.9,3.9,...,0.8,1.2,2.0,0.8,0.6,0.1,0.5,1.4,4.8,0
6070,2023-24,Washington Wizards,Jules Bernard,24,SG,19,0,7.8,1.5,3.4,...,0.5,0.8,1.4,0.8,0.2,0.1,0.4,0.7,3.9,0
6071,2023-24,Washington Wizards,Ryan Rollins,21,PG,10,0,6.6,1.3,2.5,...,0.2,0.9,1.1,1.1,0.8,0.3,0.7,0.7,4.1,0
6072,2023-24,Washington Wizards,Hamidou Diallo,25,SG,2,0,2.5,0.5,1.0,...,1.0,0.0,1.0,0.5,1.0,0.0,0.5,0.5,1.0,0


## Add Columns

In [12]:
arenas = {
    'Atlanta Hawks': [33.757222, -84.396389], 
    'Boston Celtics': [42.366303, -71.062228], 
    'Brooklyn Nets': [40.682661, -73.975225], 
    'Charlotte Hornets': [35.225, -80.839167],
    'Chicago Bulls': [41.880556, -87.674167], 
    'Cleveland Caveliers': [41.496389, -81.688056], 
    'Dallas Mavericks': [32.790556, -96.810278], 
    'Denver Nuggets': [39.748611, -105.0075], 
    'Detroit Pistons': [42.341111, -83.055], 
    'Golden State Warriors': [37.768056, -122.3875], 
    'Houston Rockets': [29.750833, -95.362222], 
    'Indiana Pacers': [39.763889, -86.155556], 
    'Los Angeles Clippers': [34.043056, -118.267222], 
    'Los Angeles Lakers': [34.043543, -118.265725], 
    'Memphis Grizzlies': [35.138333, -90.050556], 
    'Miami Heat': [25.781389, -80.188056], 
    'Milwaukee Bucks': [43.045028, -87.918167], 
    'Minnesota Timberwolves': [44.979444, -93.276111], 
    'New Oreland Pelicans': [29.948889, -90.081944], 
    'New York Knicks': [40.750556, -73.993611], 
    'Oklahoma City Thunder': [35.463333, -97.515], 
    'Orlando Magic': [28.539167, -81.383611], 
    'Philadelphia 76ers': [39.901111, -75.171944], 
    'Pheonix Suns': [33.445833, -112.071389], 
    'Portland Trailblazers': [45.531667, -122.666667], 
    'Sacremento Kings': [38.580361, -121.499611], 
    'San Antonio Spurs': [29.426944, -98.4375], 
    'Toronto Raptors': [43.643333, -79.379167], 
    'Utah Jazz': [40.768333, -111.901111], 
    'Washington Wizards': [38.898056, -77.020833]
    }

for team, location in arenas.items():
    if team not in ['Milwaukee Bucks', 'Detroit Pistons', 'Golden State Warriors', 'Sacremento Kings']: #list of teams that have changed arenas
        df2.loc[df2['Team'] == team, 'Latitude'] = location[0]
        df2.loc[df2['Team'] == team, 'Longitude'] = location[1]
    else:
        if team == 'Sacremento Kings':
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2016), 'Latitude'] = 38.649167
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2016), 'Longitude'] = -121.518056
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2016), 'Latitude'] = location[0]
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2016), 'Longitude'] = location[1]
        if team == 'Detroit Pistons':
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2017), 'Latitude'] = 42.696944
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2017), 'Longitude'] = -83.245556
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2017), 'Latitude'] = location[0]
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2017), 'Longitude'] = location[1]
        if team == 'Milwaukee Bucks':
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2018), 'Latitude'] = 43.043611
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2018), 'Longitude'] = -87.916944
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2018), 'Latitude'] = location[0]
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2018), 'Longitude'] = location[1]
        if team == 'Golden State Warriors':
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2019), 'Latitude'] = 37.750278
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) < 2019), 'Longitude'] = -122.203056
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2019), 'Latitude'] = location[0]
            df2.loc[(df2['Team'] == team) & (df2['Season'].str[:4].astype(int) >= 2019), 'Longitude'] = location[1]
df2

Unnamed: 0,Season,Team,Minutes_Played,Field_Goals,Field_Goal_Attempts,Field_Goal_Percentage,Three_Point_Field_Goals,Three_Point_Field_Goal_Attempts,Three_Point_Field_Goal_Percentage,Two_Point_Field_Goals,...,Steals,Blocks,Turnovers,Personal_Fouls,Points,Wins,Losses,NBA_Championships,Latitude,Longitude
0,2014-15,Atlanta Hawks,240.6,38.1,81.7,.466,10.0,26.2,.380,28.1,...,9.1,4.6,14.2,17.8,102.5,60,22,0,33.757222,-84.396389
1,2014-15,Boston Celtics,242.4,38.9,87.9,.443,8.0,24.6,.327,30.9,...,8.2,3.6,13.8,21.2,101.4,40,42,0,42.366303,-71.062228
2,2014-15,Brooklyn Nets,243.0,37.4,83.0,.451,6.6,19.9,.331,30.8,...,7.0,4.1,13.8,19.3,98.0,38,44,0,40.682661,-73.975225
3,2014-15,Charlotte Hornets,242.7,35.5,84.5,.420,6.1,19.1,.318,29.5,...,6.1,5.5,11.9,18.2,94.2,33,49,0,35.225000,-80.839167
4,2014-15,Chicago Bulls,242.4,36.6,82.9,.442,7.9,22.3,.353,28.7,...,6.3,5.8,14.0,18.2,100.8,50,32,0,41.880556,-87.674167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,2023-24,Sacremento Kings,242.1,43.3,90.9,.477,14.4,39.3,.366,29.0,...,7.6,4.2,13.1,19.9,116.6,46,36,0,38.580361,-121.499611
296,2023-24,San Antonio Spurs,241.8,41.9,90.7,.462,12.6,36.4,.347,29.3,...,7.1,6.3,15.1,17.2,112.1,22,60,0,29.426944,-98.437500
297,2023-24,Toronto Raptors,241.5,42.3,89.7,.471,11.5,33.1,.347,30.8,...,7.7,4.7,14.0,18.4,112.4,25,57,0,43.643333,-79.379167
298,2023-24,Utah Jazz,241.5,42.0,89.9,.467,12.9,36.5,.354,29.1,...,6.5,5.6,15.7,18.6,115.7,31,51,0,40.768333,-111.901111


## Remove Columns

In [13]:
df3 = df3.drop([
       'Average_distance_(ft.)_of_FGA',
       'Percentage_of_FGAs_that_are_2_Pt_FGAs',
       'Percentage_of_FGAs_that_are_0_to_3_feet_from_the_basket',
       'Percentage_of_FGAs_that_are_3_to_10_feet_from_the_basket',
       'Percentage_of_FGAs_that_are_10_to_16_feet_from_the_basket',
       'Percentage_of_FGAs_that_are_more_than_16_feet_from_the_basket',
       'Percentage_of_FGAs_that_are_3_Pt_FGAs', 'FG_percentage_on_2_Pt_FGAs',
       'Percentage_of_2_Pt_FGs_that_were_assisted',
       'Percentage_of_3_Pt_FGs_that_were_assisted',
       'Percentage_of_FGs_that_are_dunk_attempts',
       'Number_of_made_dunk_attempts',
       'Percentage_of_3_Pt_FGAs_from_the_corner',
       'FG_percentage_of_3_Pt_FGAs_from_the_corner',
       'Heaves_attempts_(beyond_half_court)',
       'Heaves_made_(beyond_half_court)', 'Awards'], axis=1)

In [14]:
df3.columns

Index(['Season', 'Team', 'Player', 'Age', 'Position', 'Games', 'Games_Started',
       'Minutes_Played', 'Field_Goal_Percentage',
       'FG_percentage_on_FGAs_that_are_0_to_3_feet_from_the_basket',
       'FG_percentage_on_FGAs_that_are_3_to_10_feet_from_the_basket',
       'FG_percentage_on_FGAs_that_are_10_to_16_feet_from_the_basket',
       'FG_percentage_on_FGAs_that_are_more_than_16_feet_from_the_basket',
       'FG_percentage_on_FGAs_that_are_3_Pt_FGAs'],
      dtype='object')

## Save as CSV Files

In [15]:
df1.to_csv("players.csv" ,index=False)
df2.to_csv("teams.csv" ,index=False)
df3.to_csv("players_shooting_pct.csv" ,index=False)