# Project Setup

## Imports

In [23]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams as rc
import mpld3
from io import BytesIO
import base64
import plotly.graph_objects as go
import plotly.express as px


# Gathering the data

## Web-scrape function

In [28]:
def scrape_website(url, name):
    data = requests.get(url)
    with open(f"Team Data/{name}.html", "w+", encoding="utf-8") as f:
        f.write(data.text)

## Roster

In [29]:
#scrape the roster data
scrape_website("https://www.basketball-reference.com/teams/BOS/2024.html", "Roster")

#parse the data to get desired table
with open("Team Data/Roster.html", "r", encoding="utf-8") as f:
    page_roster = f.read()
#parse the data
soup = BeautifulSoup(page_roster, "html.parser")
roster_table = soup.find(id="roster")
roster = pd.read_html(str(roster_table))[0]

roster


Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.



Unnamed: 0,No.,Player,Pos,Ht,Wt,Birth Date,Unnamed: 6,Exp,College
0,11,Payton Pritchard,PG,6-1,195,"January 28, 1998",us,3,Oregon
1,30,Sam Hauser,SF,6-8,215,"December 8, 1997",us,2,"Marquette, Virginia"
2,0,Jayson Tatum,PF,6-8,210,"March 3, 1998",us,6,Duke
3,9,Derrick White,SG,6-4,190,"July 2, 1994",us,6,"Colorado-Colorado Springs, Colorado"
4,7,Jaylen Brown,SF,6-6,223,"October 24, 1996",us,7,California
5,4,Jrue Holiday,PG,6-4,205,"June 12, 1990",us,14,UCLA
6,42,Al Horford,C,6-9,240,"June 3, 1986",do,16,Florida
7,40,Luke Kornet,C,7-2,250,"July 15, 1995",us,6,Vanderbilt
8,8,Kristaps Porziņģis,C,7-2,240,"August 2, 1995",lv,7,
9,12,Oshae Brissett,SF,6-7,210,"June 20, 1998",ca,4,Syracuse


## Team Averages

In [None]:
#scrape the roster data
scrape_website("https://www.basketball-reference.com/teams/BOS/2024.html", "Averages")

#parse the data to get desired table
with open("Team Data/Averages.html", "r", encoding="utf-8") as f:
    page_averages = f.read()

soup = BeautifulSoup(page_averages, "html.parser")
averages_table = soup.find(id="per_game")
averages = pd.read_html(str(averages_table))[0]
# Split the full names into first name and last name
averages[['First Name', 'Last Name']] = averages['Player'].str.split(' ', n=1, expand=True)


averages

  averages = pd.read_html(str(averages_table))[0]


Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,First Name,Last Name
0,1,Jayson Tatum,25,74,74,35.7,9.1,19.3,0.471,3.1,...,7.2,8.1,4.9,1.0,0.6,2.5,2.0,26.9,Jayson,Tatum
1,2,Jaylen Brown,27,70,70,33.5,9.0,17.9,0.499,2.1,...,4.3,5.5,3.6,1.2,0.5,2.4,2.6,23.0,Jaylen,Brown
2,3,Jrue Holiday,33,69,69,32.8,4.8,10.0,0.48,2.0,...,4.2,5.4,4.8,0.9,0.8,1.8,1.6,12.5,Jrue,Holiday
3,4,Derrick White,29,73,73,32.6,5.3,11.5,0.461,2.7,...,3.5,4.2,5.2,1.0,1.2,1.5,2.1,15.2,Derrick,White
4,5,Kristaps Porziņģis,28,57,57,29.6,6.8,13.2,0.516,1.9,...,5.5,7.2,2.0,0.7,1.9,1.6,2.7,20.1,Kristaps,Porziņģis
5,6,Al Horford,37,65,33,26.8,3.3,6.4,0.511,1.7,...,5.1,6.4,2.6,0.6,1.0,0.7,1.4,8.6,Al,Horford
6,7,Payton Pritchard,26,82,5,22.3,3.6,7.7,0.468,1.8,...,2.4,3.2,3.4,0.5,0.1,0.7,1.3,9.6,Payton,Pritchard
7,8,Sam Hauser,26,79,13,22.0,3.2,7.1,0.446,2.5,...,2.9,3.5,1.0,0.5,0.3,0.4,1.3,9.0,Sam,Hauser
8,9,Luke Kornet,28,63,7,15.6,2.3,3.2,0.7,0.0,...,2.3,4.1,1.1,0.4,1.0,0.3,1.2,5.3,Luke,Kornet
9,10,Xavier Tillman Sr.,25,20,2,13.7,1.7,3.3,0.515,0.4,...,2.0,2.7,1.0,0.5,0.5,0.3,0.8,4.0,Xavier,Tillman Sr.


## Team Totals

In [None]:
#scrape the roster data
scrape_website("https://www.basketball-reference.com/teams/BOS/2024.html", "Totals")

#parse the data to get desired table
with open("Team Data/Totals.html", "r", encoding="utf-8") as f:
    page_totals = f.read()

soup = BeautifulSoup(page_totals, "html.parser")
totals_table = soup.find(id="totals")
totals_temp = pd.read_html(str(totals_table))[0]

totals = totals_temp.iloc[:-1]
totals

  totals_temp = pd.read_html(str(totals_table))[0]


Unnamed: 0,Rk,Player,Age,G,GS,MP,FG,FGA,FG%,3P,...,FT%,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS
0,1.0,Jayson Tatum,25.0,74,74.0,2645,672,1426,0.471,229,...,0.833,67,534,601,364,75,43,188,145,1987
1,2.0,Derrick White,29.0,73,73.0,2381,387,839,0.461,196,...,0.901,51,259,310,377,74,87,112,152,1107
2,3.0,Jaylen Brown,27.0,70,70.0,2343,627,1256,0.499,145,...,0.703,84,303,387,249,83,37,166,185,1610
3,4.0,Jrue Holiday,33.0,69,69.0,2263,331,689,0.48,138,...,0.833,84,289,373,333,61,53,124,108,860
4,5.0,Payton Pritchard,26.0,82,5.0,1825,297,635,0.468,147,...,0.821,70,195,265,281,39,6,61,106,787
5,6.0,Sam Hauser,26.0,79,13.0,1741,249,558,0.446,197,...,0.895,45,231,276,82,40,25,32,99,712
6,7.0,Al Horford,37.0,65,33.0,1740,214,419,0.511,108,...,0.867,82,331,413,168,38,62,48,93,562
7,8.0,Kristaps Porziņģis,28.0,57,57.0,1690,388,752,0.516,110,...,0.858,97,312,409,115,42,111,89,156,1145
8,9.0,Luke Kornet,28.0,63,7.0,983,142,203,0.7,1,...,0.907,118,143,261,67,23,61,21,77,334
9,10.0,Oshae Brissett,25.0,55,1.0,630,68,153,0.444,15,...,0.602,61,99,160,44,19,8,20,54,201


# Visuals

## Points per Game

In [None]:
#isolate the name and points column
bpg = averages[['Player','PTS', 'Age']]
bpg = bpg.sort_values(by='PTS', ascending=False)

#get the leader
leader = bpg.iloc[0]['Player']

#create the graph
fig = px.bar(bpg, x='Player', y='PTS', text='PTS')
fig.update_traces(marker_color='green', textposition='outside', hovertemplate='%{x}<extra></extra>')
fig.update_layout(
    title={
        'text': f"Points per Game: Leader is {leader}",
        'font': {
            'size': 20,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    xaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    yaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold",
        }
    }
)

#turn into html
fig.write_html("graphs/ppg.html")

## Assists per Game

In [None]:
#isolate the name and points column
bpg = averages[['Player','AST', 'Age']]
bpg = bpg.sort_values(by='AST', ascending=False)

#get the leader
leader = bpg.iloc[0]['Player']

#create the graph
fig = px.bar(bpg, x='Player', y='AST', text='AST')
fig.update_traces(marker_color='green', textposition='outside', hovertemplate='%{x}<extra></extra>')
fig.update_layout(
    title={
        'text': f"Assists per Game: Leader is {leader}",
        'font': {
            'size': 20,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    xaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    yaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold",
        }
    }
)

#turn into html
fig.write_html("graphs/apg.html")

## Rebounds per Game

In [None]:
#isolate the name and points column
bpg = averages[['Player','TRB', 'Age']]
bpg = bpg.sort_values(by='TRB', ascending=False)

#get the leader
leader = bpg.iloc[0]['Player']

#create the graph
fig = px.bar(bpg, x='Player', y='TRB', text='TRB')
fig.update_traces(marker_color='green', textposition='outside', hovertemplate='%{x}<extra></extra>')
fig.update_layout(
    title={
        'text': f"Rebounds per Game: Leader is {leader}",
        'font': {
            'size': 20,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    xaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    yaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold",
        }
    }
)

#turn into html
fig.write_html("graphs/rpg.html")

## Steals per Game

In [None]:
#isolate the name and points column
bpg = averages[['Player','STL', 'Age']]
bpg = bpg.sort_values(by='STL', ascending=False)

#get the leader
leader = bpg.iloc[0]['Player']

#create the graph
fig = px.bar(bpg, x='Player', y='STL', text='STL')
fig.update_traces(marker_color='green', textposition='outside', hovertemplate='%{x}<extra></extra>')
fig.update_layout(
    title={
        'text': f"Steals per Game: Leader is {leader}",
        'font': {
            'size': 20,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    xaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    yaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold",
        }
    }
)

#turn into html
fig.write_html("graphs/spg.html")

## Blocks per Game

In [None]:
#isolate the name and points column
bpg = averages[['Player','BLK', 'Age']]
bpg = bpg.sort_values(by='BLK', ascending=False)

#get the leader
leader = bpg.iloc[0]['Player']

#create the graph
fig = px.bar(bpg, x='Player', y='BLK', text='BLK')
fig.update_traces(marker_color='green', textposition='outside', hovertemplate='%{x}<extra></extra>')
fig.update_layout(
    title={
        'text': f"Blocks per Game: Leader is {leader}",
        'font': {
            'size': 20,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    xaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold"
        }
    },
    yaxis={
        'tickfont': {
            'size': 12,
            'family': "Arial",
            'color': "black",
            'weight': "bold",
        }
    }
)

#turn into html
fig.write_html("graphs/bpg.html")

# Player Classification Model Data

In [None]:
#code for model goes here

## Additional Data Scraping Algorithm

In [42]:
#indivisual player scraping algorithm
def player_scrape_website(url, player, f_name):
    data = requests.get(url)
    with open(f"Career Data/{player}/{f_name}.html", "w+", encoding="utf-8") as f:
        f.write(data.text)

## Payton Prichard

In [43]:
player_scrape_website("https://www.basketball-reference.com/players/p/pritcpa01/splits/2021", "Payton Prichard", "2021")

## Sam Hauser

## Jayson Tatum

## Derrick White

## Jaylen Brown

## Jrue Holiday

## Al Hortford

## Luke Kornet

## Kristaps Porizingus

## Oshae Brissett

## Svi Michiliuk

## Nemmius Queta

## Xavier Tillman Jr. 

## Jaden Springer

## Jordan Walsh