In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from pandas import Series
import sqlite3
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [2]:
url_data ="https://admin.euroleague.net/main/statistics?mode=Leaders&entity=Players&seasonmode=Single&seasoncode=E{}&cat=Assistances&agg=PerGame"
years = list(range(2001, 2022))

In [3]:
for year in years:
    with open("assists_leaders_htmls/{}.html".format(year), "w+" , encoding="utf-8") as file :
        url = url_data.format(year)
        data = requests.get(url)
        file.write(data.text)
        

In [4]:
dfs = []
for year in years:
    with open("assists_leaders_htmls/{}.html".format(year)) as f:
        page = f.read()
    
    soup = BeautifulSoup(page, 'html.parser')
    soup.find('tr', class_="StatsGridResults table responsive fixed-cols-2 table-left-cols-2 table-expand table-striped table-hover table-noborder table-centered table-condensed")
    player_table = soup.find_all(id="ctl00_ctl00_ctl00_ctl00_maincontainer_maincontent_contentpane_ctl01_ctl01_gvResults")[0]
    assists = pd.read_html(str(player_table))[0]
    assists["Year"] = year
    dfs.append(assists)

In [5]:
assists = pd.concat(dfs)
assists

Unnamed: 0,Rank,Player,Team,GamesG,AssistsAS,Ave.,Per 40 min.40m,Year
0,1,"BENNETT, ELMERBENNETT, E.",BKNTau CeramicaTau Ceramica,15,79,5.27,6.96,2001
1,2,"HAWKINS, MICHAELHAWKINS, M.",WKSIdea SlaskIdea Slask,14,70,5.00,6.26,2001
2,3,"CELESTAND, JOHNCELESTAND, J.","2 FORSkipper BolognaSkipper Bologna, ASVAsvel ...",13,62,4.77,7.90,2001
3,4,"PHELPS, DERRICKPHELPS, D.",BERAlba BerlinAlba Berlin,13,53,4.08,5.02,2001
4,5,"PAPALOUKAS, THEODOROSPAPALOUKAS, T.",OLYOlympiacosOlympiacos,19,76,4.00,5.94,2001
...,...,...,...,...,...,...,...,...
37,38,"HACKETT, DANIELHACKETT, D.",CSKCSKA MoscowCSKA Moscow,22,57,2.59,4.81,2021
38,39,"SIMON, KRUNOSLAVSIMON, K.",EFSAnadolu Efes IstanbulAnadolu Efes Istanbul,27,69,2.56,4.83,2021
39,40,"PAPAPETROU, IOANNISPAPAPETROU, I.",PAOPanathinaikos OPAP AthensPanathinaikos OPAP...,23,58,2.52,3.47,2021
40,41,"WALDEN, COREYWALDEN, C.",BAYFC Bayern MunichFC Bayern Munich,25,63,2.52,4.50,2021


In [6]:
engine = create_engine('sqlite:///assists_leaders.db')
Session = sessionmaker(bind=engine)
session = Session()

In [7]:
assists.to_sql(
    name = "assists_leaders",
    con = engine,
    index = True,
    if_exists = "replace"
)

666

In [8]:
assists_select = session.execute('select * from "assists_leaders"')
assists_select.all()

[(0, 1, 'BENNETT, ELMERBENNETT, E.', 'BKNTau CeramicaTau Ceramica', 15, 79, 5.27, 6.96, 2001),
 (1, 2, 'HAWKINS, MICHAELHAWKINS, M.', 'WKSIdea SlaskIdea Slask', 14, 70, 5.0, 6.26, 2001),
 (2, 3, 'CELESTAND, JOHNCELESTAND, J.', '2 FORSkipper BolognaSkipper Bologna, ASVAsvel BasketAsvel Basket', 13, 62, 4.77, 7.9, 2001),
 (3, 4, 'PHELPS, DERRICKPHELPS, D.', 'BERAlba BerlinAlba Berlin', 13, 53, 4.08, 5.02, 2001),
 (4, 5, 'PAPALOUKAS, THEODOROSPAPALOUKAS, T.', 'OLYOlympiacosOlympiacos', 19, 76, 4.0, 5.94, 2001),
 (5, 6, 'McDONALD, ARRIELMCDONALD, A.', 'MACMaccabi EliteMaccabi Elite', 20, 76, 3.8, 4.78, 2001),
 (6, 7, 'EDNEY, TYUSEDNEY, T.', 'TREBenetton BasketBenetton Basket', 19, 72, 3.79, 5.0, 2001),
 (7, 8, 'LAKOVIC, JAKALAKOVIC, J.', 'MESKRKA Novo MestoKRKA Novo Mesto', 14, 53, 3.79, 4.66, 2001),
 (8, 9, 'BOOKER, MELVINBOOKER, M.', 'PESScavolini PesaroScavolini Pesaro', 20, 75, 3.75, 4.44, 2001),
 (9, 10, 'LUKOVSKI, DRAGANLUKOVSKI, D.', 'PAUPau-OrthezPau-Orthez', 14, 50, 3.57, 5.7, 200

In [9]:
assists.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 666 entries, 0 to 41
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Rank            666 non-null    int64  
 1   Player          666 non-null    object 
 2   Team            666 non-null    object 
 3   GamesG          666 non-null    int64  
 4   AssistsAS       666 non-null    int64  
 5   Ave.            666 non-null    float64
 6   Per 40 min.40m  666 non-null    float64
 7   Year            666 non-null    int64  
dtypes: float64(2), int64(4), object(2)
memory usage: 46.8+ KB


In [10]:
assists.columns

Index(['Rank', 'Player', 'Team', 'GamesG', 'AssistsAS', 'Ave.',
       'Per 40 min.40m', 'Year'],
      dtype='object')

In [11]:
dummies = pd.get_dummies(assists['Player'])
dummies = pd.get_dummies(assists['Team'])
dummies.head()

Unnamed: 0,"2 BAYFC Bayern MunichFC Bayern Munich, CZTCrvena Zvezda Telekom BelgradeCrvena Zvezda Telekom Belgrade","2 BROBrose Baskets BambergBrose Baskets Bamberg, PAOPanathinaikos AthensPanathinaikos Athens","2 EAMEA7 Emporio Armani MilanEA7 Emporio Armani Milan, CLALaboral Kutxa VitoriaLaboral Kutxa Vitoria","2 FORSkipper BolognaSkipper Bologna, ASVAsvel BasketAsvel Basket","2 LBOLaboral Kutxa VitoriaLaboral Kutxa Vitoria, EFSAnadolu Efes IstanbulAnadolu Efes Istanbul","2 LIELietuvos Rytas VilniusLietuvos Rytas Vilnius, FBUFenerbahce Ulker IstanbulFenerbahce Ulker Istanbul","2 LIELietuvos RytasLietuvos Rytas, EFSEfes PilsenEfes Pilsen","2 MPSMontepaschi SienaMontepaschi Siena, EA7EA7 Emporio Armani MilanEA7 Emporio Armani Milan","2 PARPartizan mt:s BelgradePartizan mt:s Belgrade, OLYOlympiacos PiraeusOlympiacos Piraeus","2 PROAsseco Prokom GdyniaAsseco Prokom Gdynia, EFSAnadolu Efes IstanbulAnadolu Efes Istanbul",...,UNKUnics KazanUnics Kazan,UOLUnion Olimpija LjubljanaUnion Olimpija Ljubljana,VBCPamesa ValenciaPamesa Valencia,VBCPower Electronics ValenciaPower Electronics Valencia,VBCValencia BasketValencia Basket,VIRKinder Virtus BolognaKinder Virtus Bologna,WKSIdea SlaskIdea Slask,ZALZalgiris KaunasZalgiris Kaunas,ZALZalgirisZalgiris,ZENZenit St PetersburgZenit St Petersburg
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
2,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
assists.drop(['Player', 'Team'], axis=1, inplace=True)

In [13]:
assists.head()

Unnamed: 0,Rank,GamesG,AssistsAS,Ave.,Per 40 min.40m,Year
0,1,15,79,5.27,6.96,2001
1,2,14,70,5.0,6.26,2001
2,3,13,62,4.77,7.9,2001
3,4,13,53,4.08,5.02,2001
4,5,19,76,4.0,5.94,2001


In [24]:
X = assists[['Rank','GamesG','AssistsAS','Year']]

In [25]:
y = assists[['Ave.']]

In [26]:
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.33, random_state=42)

In [27]:
regresion = LinearRegression().fit(X_train, y_train)

In [28]:
predict = regresion.predict(X_test)

In [29]:
predict

array([[2.91075115],
       [3.99768008],
       [4.40120722],
       [2.90341149],
       [2.38234247],
       [3.38634308],
       [3.8908989 ],
       [5.51625889],
       [4.63122224],
       [4.26968074],
       [3.02207068],
       [3.13533356],
       [4.05516491],
       [3.75922249],
       [2.72375139],
       [4.18105589],
       [2.97959683],
       [3.64441657],
       [2.75461956],
       [5.20840602],
       [2.98232041],
       [2.75029647],
       [2.89429056],
       [2.76361688],
       [2.50391817],
       [4.05652778],
       [3.16431257],
       [2.77058353],
       [3.39116386],
       [3.03917949],
       [3.54959124],
       [4.74713432],
       [3.15574364],
       [3.62209162],
       [3.55143234],
       [3.3192944 ],
       [3.45131364],
       [6.80791265],
       [3.45446956],
       [2.73402208],
       [2.691027  ],
       [5.13869239],
       [2.73645326],
       [3.43540782],
       [3.08486007],
       [2.9991009 ],
       [3.02999477],
       [2.812

In [30]:
try_predict = pd.Series(data=list(predict), name='prediction')

In [31]:
try_predict

0      [2.9107511480223813]
1      [3.9976800753326174]
2        [4.40120722305803]
3       [2.903411485075516]
4      [2.3823424724229447]
               ...         
215     [3.194260787362488]
216      [3.13819467893347]
217    [2.7846739682488035]
218    [2.8999278596205293]
219    [2.5856519225496335]
Name: prediction, Length: 220, dtype: object

In [36]:
result = pd.concat([y_test.reset_index(), try_predict], axis=1)[['Ave.', 'prediction']]

In [37]:
result

Unnamed: 0,Ave.,prediction
0,3.18,[2.9107511480223813]
1,3.92,[3.9976800753326174]
2,4.19,[4.40120722305803]
3,2.50,[2.903411485075516]
4,2.61,[2.3823424724229447]
...,...,...
215,3.32,[3.194260787362488]
216,3.14,[3.13819467893347]
217,2.86,[2.7846739682488035]
218,2.65,[2.8999278596205293]


In [38]:
regresion.score(X_test, y_test)

0.9675938963870043

In [38]:
assists = pd.concat(dfs)
assists

Unnamed: 0,Rank,Player,Team,GamesG,AssistsAS,Ave.,Per 40 min.40m,Year
0,1,"BENNETT, ELMERBENNETT, E.",BKNTau CeramicaTau Ceramica,15,79,5.27,6.96,2001
1,2,"HAWKINS, MICHAELHAWKINS, M.",WKSIdea SlaskIdea Slask,14,70,5.00,6.26,2001
2,3,"CELESTAND, JOHNCELESTAND, J.","2 FORSkipper BolognaSkipper Bologna, ASVAsvel ...",13,62,4.77,7.90,2001
3,4,"PHELPS, DERRICKPHELPS, D.",BERAlba BerlinAlba Berlin,13,53,4.08,5.02,2001
4,5,"PAPALOUKAS, THEODOROSPAPALOUKAS, T.",OLYOlympiacosOlympiacos,19,76,4.00,5.94,2001
...,...,...,...,...,...,...,...,...
37,38,"HACKETT, DANIELHACKETT, D.",CSKCSKA MoscowCSKA Moscow,22,57,2.59,4.81,2021
38,39,"SIMON, KRUNOSLAVSIMON, K.",EFSAnadolu Efes IstanbulAnadolu Efes Istanbul,27,69,2.56,4.83,2021
39,40,"PAPAPETROU, IOANNISPAPAPETROU, I.",PAOPanathinaikos OPAP AthensPanathinaikos OPAP...,23,58,2.52,3.47,2021
40,41,"WALDEN, COREYWALDEN, C.",BAYFC Bayern MunichFC Bayern Munich,25,63,2.52,4.50,2021
