# Import libraries and load packages

In [1]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns

# Scrape data

In [2]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# URL of the website
url = "https://www.acb.com/resultados-clasificacion/ver/temporada_id/2023/competicion_id/1/jornada_numero/23"

# Send a GET request to the URL
response = requests.get(url)

# Parse the HTML content
soup = BeautifulSoup(response.content, "html.parser")

# Find the table with class 'clasificacion'
table = soup.find("table", class_="clasificacion")

# empty lists to store data
positions = []
teams = []
games = []
wins = []
loses = []
points_for = []
points_against = []
points_dif = []

# Loop through table rows skipping the header row (first row)
for row in table.find_all("tr")[1:]:
    # Extract data from each row
    cells = row.find_all("td")
    position = cells[0].text.strip()
    team = cells[2].text.strip()
    game = cells[3].text.strip()
    win = cells[4].text.strip()
    lose = cells[5].text.strip()
    pf = cells[7].text.strip()
    pa = cells[8].text.strip()
    dif = cells[9].text.strip()
    
    # Append data to lists
    positions.append(position)
    teams.append(team)
    games.append(game)
    wins.append(win)
    loses.append(lose)
    points_for.append(pf)
    points_against.append(pa)
    points_dif.append(dif)

# Create a DataFrame
data = {
    'Position': positions,
    'Team': teams,
    'Games': games,
    'Wins': wins,
    'Loses': loses,
    'Points_For': points_for,
    'Points_Against': points_against,
    'Points_Difference': points_dif
}

LigaEndesa = pd.DataFrame(data)

# Print the DataFrame
LigaEndesa

Unnamed: 0,Position,Team,Games,Wins,Loses,Points_For,Points_Against,Points_Difference
0,1,RMBReal MadridReal Madrid,23,20,3,2.051,1.792,259
1,2,UNIUnicajaUnicaja,23,19,4,2.025,1.775,250
2,3,BARBarçaBarça,23,16,7,2.009,1.822,187
3,4,DGCDreamland Gran CanariaDreamland Gran Canaria,23,15,8,1.93,1.867,63
4,5,UCMUCAM MurciaUCAM Murcia,23,14,9,1.915,1.835,80
5,6,LNTLenovo TenerifeLenovo Tenerife,23,14,9,1.945,1.874,71
6,7,VBCValencia BasketValencia Basket,23,14,9,1.871,1.839,32
7,8,JOVJoventut BadalonaJoventut Badalona,23,14,9,1.881,1.953,-72
8,9,BKNBaskoniaBaskonia,23,12,11,1.964,1.989,-25
9,10,BAXBAXI ManresaBAXI Manresa,23,12,11,1.941,1.946,-5


In [3]:
# rename Team names
LigaEndesa['Team'] = LigaEndesa['Team'].replace({'RMBReal MadridReal Madrid': 'Real Madrid', 'UNIUnicajaUnicaja': 'Unicaja', 'BARBarçaBarça':'FC Barcelona','DGCDreamland Gran CanariaDreamland Gran Canaria':'Gran Canaria','UCMUCAM MurciaUCAM Murcia':'UCAM Murcia','VBCValencia BasketValencia Basket':'Valencia Basket',
                                                 'LNTLenovo TenerifeLenovo Tenerife':'Lenovo Tenerife','BAXBAXI ManresaBAXI Manresa':'BAXI Manresa','BKNBaskoniaBaskonia':'Baskonia','CAZCasademont ZaragozaCasademont Zaragoza':'Casademont Zaragoza',
                                                 'JOVJoventut BadalonaJoventut Badalona': 'Joventut Badalona', 'SBBSurne Bilbao BasketSurne Bilbao Basket':'Surne Bilbao Basket','GIRBàsquet GironaBàsquet Girona':'Bàsquet Girona','MBAMoraBanc AndorraMoraBanc Andorra':'MoraBanc Andorra','MOBMonbus ObradoiroMonbus Obradoiro':'Monbus Obradoiro','COVCoviran GranadaCoviran Granada':'Coviran Granada',
                                                 'BRERío BreogánRío Breogán':'Río Breogán', 'ZPAZunder PalenciaZunder Palencia':'Zunder Palencia'})
LigaEndesa


Unnamed: 0,Position,Team,Games,Wins,Loses,Points_For,Points_Against,Points_Difference
0,1,Real Madrid,23,20,3,2.051,1.792,259
1,2,Unicaja,23,19,4,2.025,1.775,250
2,3,FC Barcelona,23,16,7,2.009,1.822,187
3,4,Gran Canaria,23,15,8,1.93,1.867,63
4,5,UCAM Murcia,23,14,9,1.915,1.835,80
5,6,Lenovo Tenerife,23,14,9,1.945,1.874,71
6,7,Valencia Basket,23,14,9,1.871,1.839,32
7,8,Joventut Badalona,23,14,9,1.881,1.953,-72
8,9,Baskonia,23,12,11,1.964,1.989,-25
9,10,BAXI Manresa,23,12,11,1.941,1.946,-5


In [4]:
# drop Position column
LigaEndesa = LigaEndesa.drop(['Position'], axis=1)

In [5]:
# correct data types
LigaEndesa['Games'] = pd.to_numeric(LigaEndesa['Games'], errors='coerce')
LigaEndesa['Wins'] = pd.to_numeric(LigaEndesa['Wins'], errors='coerce')
LigaEndesa['Loses'] = pd.to_numeric(LigaEndesa['Loses'], errors='coerce')
LigaEndesa['Points_For'] = pd.to_numeric(LigaEndesa['Points_For'], errors='coerce')
LigaEndesa['Points_Against'] = pd.to_numeric(LigaEndesa['Points_Against'], errors='coerce')
LigaEndesa['Points_Difference'] = pd.to_numeric(LigaEndesa['Points_Difference'], errors='coerce')


In [6]:
# ponts_for and ponts_against times thousand to change it from float to int64
LigaEndesa['Points_For'] = LigaEndesa['Points_For'] * 1000
LigaEndesa['Points_Against'] = LigaEndesa['Points_Against'] * 1000

# ponts_for and ponts_against to int64
LigaEndesa['Points_For'] = LigaEndesa['Points_For'].astype('int64')
LigaEndesa['Points_Against'] = LigaEndesa['Points_Against'].astype('int64')

In [7]:
# check df
LigaEndesa

Unnamed: 0,Team,Games,Wins,Loses,Points_For,Points_Against,Points_Difference
0,Real Madrid,23,20,3,2051,1792,259
1,Unicaja,23,19,4,2025,1775,250
2,FC Barcelona,23,16,7,2009,1822,187
3,Gran Canaria,23,15,8,1930,1867,63
4,UCAM Murcia,23,14,9,1915,1835,80
5,Lenovo Tenerife,23,14,9,1945,1874,71
6,Valencia Basket,23,14,9,1871,1839,32
7,Joventut Badalona,23,14,9,1881,1953,-72
8,Baskonia,23,12,11,1964,1989,-25
9,BAXI Manresa,23,12,11,1941,1946,-5
