In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#importing request to make sure url works
import requests
#using beautiful soup to web scrape data from basketball reference
from bs4 import BeautifulSoup

import warnings





In [2]:
# The requests library can send a GET request to the 2022 spurs page
spurs_request = requests.get('https://www.basketball-reference.com/teams/SAS/2022.html')

# BeautifulSoup library parses the content of an HTML document, in this case wiz_res
spurs_soup = BeautifulSoup(spurs_request.content, 'lxml')

# BeautifulSoup's .find() method searches for a tag and specified attributes, 
# returning the first match 
spurs_pergame = spurs_soup.find(name = 'table', attrs = {'id' : 'per_game'})

In [3]:
def get_pergame():
    # Creating a list of dictionaries to then convert into a Pandas Dataframe
    spurs_stats = []

    for row in spurs_pergame.find_all('tr')[1:]:  # Excluding the first 'tr', since that's the table's title head

        player = {}
        player['Name'] = row.find('a').text.strip()
        player['Age'] = row.find('td', {'data-stat' : 'age'}).text
        player['GP'] = row.find('td', {'data-stat' : 'g'}).text
        player['Points_PG'] = row.find('td', {'data-stat' : 'pts_per_g'}).text
        player['Field_Goal_percent'] = row.find('td', {'data-stat' : 'fg_pct'}).text
        player['3PT_percent'] = row.find('td', {'data-stat' : 'fg3_pct'}).text
        player['2PT_percent'] = row.find('td', {'data-stat' : 'fg2_pct'}).text
        player['EFG_percent'] = row.find('td', {'data-stat' : 'efg_pct'}).text
        player['FTA'] = row.find('td', {'data-stat' : 'fta_per_g'}).text 
        player['FT%'] = row.find('td', {'data-stat' : 'ft_pct'}).text
        player['ORB'] = row.find('td', {'data-stat' : 'orb_per_g'}).text
        player['DRB'] = row.find('td', {'data-stat' : 'drb_per_g'}).text
        player['Total_Rebounds_PG'] = row.find('td', {'data-stat' : 'trb_per_g'}).text
        player['Assists_PG'] = row.find('td', {'data-stat' : 'ast_per_g'}).text
        player['Steals_PG'] = row.find('td', {'data-stat' : 'stl_per_g'}).text
        player['Blocks_PG'] = row.find('td', {'data-stat' : 'blk_per_g'}).text
        player['Turnovers_PG'] = row.find('td', {'data-stat' : 'tov_per_g'}).text
        player['Fouls_PG'] = row.find('td', {'data-stat' : 'pf_per_g'}).text
        player['Min_PG'] = row.find('td', {'data-stat' : 'mp_per_g'}).text
        spurs_stats.append(player)

    pergamestats = pd.DataFrame(spurs_stats)

    return pergamestats.set_index('Name').sort_index(ascending=True)

In [4]:
pergamestats = get_pergame()
pergamestats

Unnamed: 0_level_0,Age,GP,Points_PG,Field_Goal_percent,3PT_percent,2PT_percent,EFG_percent,FTA,FT%,ORB,DRB,Total_Rebounds_PG,Assists_PG,Steals_PG,Blocks_PG,Turnovers_PG,Fouls_PG,Min_PG
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Anthony Lamb,24,2,0.0,0.0,0.0,,0.0,0.0,,0.0,0.5,0.5,1.0,0.0,0.0,0.0,0.0,4.0
Bryn Forbes,28,40,9.1,0.432,0.417,0.448,0.539,1.2,0.898,0.1,1.5,1.6,1.0,0.4,0.1,0.8,1.1,16.9
Dejounte Murray,25,68,21.1,0.462,0.327,0.504,0.5,3.6,0.794,1.2,7.1,8.3,9.2,2.0,0.3,2.6,2.0,34.8
Derrick White,27,49,14.4,0.426,0.314,0.519,0.497,3.3,0.869,0.5,3.0,3.5,5.6,1.0,0.9,1.8,2.4,30.3
Devin Vassell,21,71,12.3,0.427,0.361,0.492,0.516,1.4,0.838,0.6,3.7,4.3,1.9,1.1,0.6,0.8,2.0,27.3
Devontae Cacok,25,15,3.1,0.677,,0.677,0.677,0.5,0.571,0.5,2.3,2.8,0.4,0.5,0.5,0.2,1.1,8.1
Doug McDermott,30,51,11.3,0.462,0.422,0.512,0.578,1.0,0.784,0.4,1.9,2.3,1.3,0.3,0.1,0.8,1.5,24.0
Drew Eubanks,24,49,4.7,0.528,0.125,0.55,0.531,1.6,0.747,1.3,2.7,4.0,1.0,0.3,0.6,0.9,1.2,12.1
Jakob Poeltl,26,68,13.5,0.618,1.0,0.618,0.619,2.8,0.495,3.9,5.5,9.3,2.8,0.7,1.7,1.6,3.1,29.0
Jaylen Morris,26,3,0.7,0.0,0.0,0.0,0.0,1.3,0.5,0.0,0.7,0.7,0.7,0.0,0.0,0.3,0.7,5.3


In [5]:
def get_adv():
    spurs_adv = spurs_soup.find(name = 'table', attrs = {'id' : 'advanced'})

    # Creating a list of dictionaries to then convert into a Pandas Dataframe
    spurs_adv_stats = []

    for row in spurs_adv.find_all('tr')[1:]:  # Excluding the first 'tr', since that's the table's title head

        adv_player = {}
        adv_player['Name'] = row.find('a').text.strip()
        adv_player['Age'] = row.find('td', {'data-stat' : 'age'}).text
        adv_player['PER'] = row.find('td', {'data-stat' : 'per'}).text
        adv_player['TS_percent'] = row.find('td', {'data-stat' : 'ts_pct'}).text
        adv_player['3PAr'] = row.find('td', {'data-stat' : 'fg3a_per_fga_pct'}).text
        adv_player['FTr'] = row.find('td', {'data-stat' : 'fta_per_fga_pct'}).text
        adv_player['ORB_percent'] = row.find('td', {'data-stat' : 'orb_pct'}).text
        adv_player['DRB_percent'] = row.find('td', {'data-stat' : 'drb_pct'}).text
        adv_player['TRB_percent'] = row.find('td', {'data-stat' : 'trb_pct'}).text
        adv_player['AST_percent'] = row.find('td', {'data-stat' : 'ast_pct'}).text
        adv_player['STL_percent'] = row.find('td', {'data-stat' : 'stl_pct'}).text
        adv_player['BLK_percent'] = row.find('td', {'data-stat' : 'blk_pct'}).text
        adv_player['TOV_percent'] = row.find('td', {'data-stat' : 'tov_pct'}).text
        adv_player['USG_percent'] = row.find('td', {'data-stat' : 'usg_pct'}).text
        adv_player['OWS'] = row.find('td', {'data-stat' : 'ows'}).text
        adv_player['DWS'] = row.find('td', {'data-stat' : 'dws'}).text
        adv_player['WS'] = row.find('td', {'data-stat' : 'ws'}).text
        adv_player['OBPM'] = row.find('td', {'data-stat' : 'obpm'}).text
        adv_player['DBPM'] = row.find('td', {'data-stat' : 'dbpm'}).text
        adv_player['BPM'] = row.find('td', {'data-stat' : 'bpm'}).text
        adv_player['VORP'] = row.find('td', {'data-stat' : 'vorp'}).text


        spurs_adv_stats.append(adv_player)

    advstats = pd.DataFrame(spurs_adv_stats)
    return advstats.set_index('Name').sort_index(ascending=True)

In [6]:
advstats = get_adv()
advstats

Unnamed: 0_level_0,Age,PER,TS_percent,3PAr,FTr,ORB_percent,DRB_percent,TRB_percent,AST_percent,STL_percent,BLK_percent,TOV_percent,USG_percent,OWS,DWS,WS,OBPM,DBPM,BPM,VORP
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
Anthony Lamb,24,-0.5,0.0,1.0,0.0,0.0,13.2,6.6,27.9,0.0,0.0,0.0,10.6,0.0,0.0,0.0,-6.2,-1.8,-8.0,0.0
Bryn Forbes,28,12.4,0.572,0.51,0.166,0.6,9.2,4.9,8.6,1.2,0.4,9.4,21.9,0.3,0.4,0.7,-0.3,-1.3,-1.6,0.1
Dejounte Murray,25,22.3,0.533,0.237,0.199,3.5,21.6,12.5,40.6,2.8,0.8,11.8,27.3,3.9,3.4,7.3,3.7,1.7,5.4,4.4
Derrick White,27,15.7,0.551,0.453,0.281,1.8,10.5,6.1,25.2,1.6,2.4,12.0,20.7,2.0,1.3,3.3,1.2,0.6,1.8,1.4
Devin Vassell,21,13.3,0.54,0.497,0.13,2.4,14.3,8.3,9.7,1.9,1.8,6.7,18.9,1.3,2.0,3.3,-0.5,0.0,-0.5,0.7
Devontae Cacok,25,20.2,0.675,0.0,0.226,6.9,29.7,18.2,6.9,3.2,4.9,8.1,12.9,0.3,0.3,0.5,0.7,3.5,4.2,0.2
Doug McDermott,30,11.4,0.593,0.551,0.11,1.5,8.4,4.9,7.3,0.5,0.4,8.1,18.3,1.1,0.5,1.6,-0.4,-2.1,-2.5,-0.1
Drew Eubanks,24,16.0,0.588,0.05,0.497,11.2,23.4,17.3,11.1,1.0,4.2,19.2,17.0,0.6,0.8,1.4,-1.9,0.7,-1.2,0.1
Jakob Poeltl,26,20.8,0.613,0.002,0.287,13.9,20.0,16.9,14.0,1.1,5.0,12.7,18.3,4.4,2.5,6.9,0.9,0.7,1.6,1.7
Jaylen Morris,26,-11.4,0.129,0.333,0.667,0.0,13.2,6.6,14.0,0.0,0.0,11.4,23.1,-0.1,0.0,-0.1,-15.8,-5.7,-21.5,-0.1


In [7]:
def get_salaries():
# Was unable to webscrape salaries so i pulled the data into a csv and used pandas to convert to dataframe
# salaries = spurs_soup.find(name = 'table', attrs = {'id' : 'salaries2'})

    salaries = pd.read_csv('salaries - Sheet1.csv')
    return salaries.set_index('Name').sort_index(ascending=True)

In [8]:
salaries = get_salaries()
salaries

Unnamed: 0_level_0,Salary
Name,Unnamed: 1_level_1
Anthony Lamb,"$85,578"
Bryn Forbes,"$4,500,000"
Dejounte Murray,"$15,428,880"
Derrick White,"$15,178,571"
Devin Vassell,"$4,235,160"
Devontae Cacok,"$364,533"
Doug McDermott,"$13,750,000"
Drew Eubanks,"$2,239,820"
Jakob Poeltl,"$8,750,000"
Jaylen Morris,"$95,930"


In [9]:
spurs_df = pd.concat([pergamestats, advstats, salaries], axis=1)
spurs_df

Unnamed: 0_level_0,Age,GP,Points_PG,Field_Goal_percent,3PT_percent,2PT_percent,EFG_percent,FTA,FT%,ORB,...,TOV_percent,USG_percent,OWS,DWS,WS,OBPM,DBPM,BPM,VORP,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Anthony Lamb,24,2,0.0,0.0,0.0,,0.0,0.0,,0.0,...,0.0,10.6,0.0,0.0,0.0,-6.2,-1.8,-8.0,0.0,"$85,578"
Bryn Forbes,28,40,9.1,0.432,0.417,0.448,0.539,1.2,0.898,0.1,...,9.4,21.9,0.3,0.4,0.7,-0.3,-1.3,-1.6,0.1,"$4,500,000"
Dejounte Murray,25,68,21.1,0.462,0.327,0.504,0.5,3.6,0.794,1.2,...,11.8,27.3,3.9,3.4,7.3,3.7,1.7,5.4,4.4,"$15,428,880"
Derrick White,27,49,14.4,0.426,0.314,0.519,0.497,3.3,0.869,0.5,...,12.0,20.7,2.0,1.3,3.3,1.2,0.6,1.8,1.4,"$15,178,571"
Devin Vassell,21,71,12.3,0.427,0.361,0.492,0.516,1.4,0.838,0.6,...,6.7,18.9,1.3,2.0,3.3,-0.5,0.0,-0.5,0.7,"$4,235,160"
Devontae Cacok,25,15,3.1,0.677,,0.677,0.677,0.5,0.571,0.5,...,8.1,12.9,0.3,0.3,0.5,0.7,3.5,4.2,0.2,"$364,533"
Doug McDermott,30,51,11.3,0.462,0.422,0.512,0.578,1.0,0.784,0.4,...,8.1,18.3,1.1,0.5,1.6,-0.4,-2.1,-2.5,-0.1,"$13,750,000"
Drew Eubanks,24,49,4.7,0.528,0.125,0.55,0.531,1.6,0.747,1.3,...,19.2,17.0,0.6,0.8,1.4,-1.9,0.7,-1.2,0.1,"$2,239,820"
Jakob Poeltl,26,68,13.5,0.618,1.0,0.618,0.619,2.8,0.495,3.9,...,12.7,18.3,4.4,2.5,6.9,0.9,0.7,1.6,1.7,"$8,750,000"
Jaylen Morris,26,3,0.7,0.0,0.0,0.0,0.0,1.3,0.5,0.0,...,11.4,23.1,-0.1,0.0,-0.1,-15.8,-5.7,-21.5,-0.1,"$95,930"


In [10]:
# We now have all of our data acquired and joined together!
spurs_df

Unnamed: 0_level_0,Age,GP,Points_PG,Field_Goal_percent,3PT_percent,2PT_percent,EFG_percent,FTA,FT%,ORB,...,TOV_percent,USG_percent,OWS,DWS,WS,OBPM,DBPM,BPM,VORP,Salary
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Anthony Lamb,24,2,0.0,0.0,0.0,,0.0,0.0,,0.0,...,0.0,10.6,0.0,0.0,0.0,-6.2,-1.8,-8.0,0.0,"$85,578"
Bryn Forbes,28,40,9.1,0.432,0.417,0.448,0.539,1.2,0.898,0.1,...,9.4,21.9,0.3,0.4,0.7,-0.3,-1.3,-1.6,0.1,"$4,500,000"
Dejounte Murray,25,68,21.1,0.462,0.327,0.504,0.5,3.6,0.794,1.2,...,11.8,27.3,3.9,3.4,7.3,3.7,1.7,5.4,4.4,"$15,428,880"
Derrick White,27,49,14.4,0.426,0.314,0.519,0.497,3.3,0.869,0.5,...,12.0,20.7,2.0,1.3,3.3,1.2,0.6,1.8,1.4,"$15,178,571"
Devin Vassell,21,71,12.3,0.427,0.361,0.492,0.516,1.4,0.838,0.6,...,6.7,18.9,1.3,2.0,3.3,-0.5,0.0,-0.5,0.7,"$4,235,160"
Devontae Cacok,25,15,3.1,0.677,,0.677,0.677,0.5,0.571,0.5,...,8.1,12.9,0.3,0.3,0.5,0.7,3.5,4.2,0.2,"$364,533"
Doug McDermott,30,51,11.3,0.462,0.422,0.512,0.578,1.0,0.784,0.4,...,8.1,18.3,1.1,0.5,1.6,-0.4,-2.1,-2.5,-0.1,"$13,750,000"
Drew Eubanks,24,49,4.7,0.528,0.125,0.55,0.531,1.6,0.747,1.3,...,19.2,17.0,0.6,0.8,1.4,-1.9,0.7,-1.2,0.1,"$2,239,820"
Jakob Poeltl,26,68,13.5,0.618,1.0,0.618,0.619,2.8,0.495,3.9,...,12.7,18.3,4.4,2.5,6.9,0.9,0.7,1.6,1.7,"$8,750,000"
Jaylen Morris,26,3,0.7,0.0,0.0,0.0,0.0,1.3,0.5,0.0,...,11.4,23.1,-0.1,0.0,-0.1,-15.8,-5.7,-21.5,-0.1,"$95,930"


In [None]:
# Wrangle data into one function
def wrangle_spurs():
    