In [1]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split

In [2]:
with sqlite3.connect("../laliga.sqlite") as con:
    df=pd.read_sql_query("SELECT * FROM Matches",con)
    dr=pd.read_sql_query("SELECT * FROM Predictions",con)

df = df.dropna(subset=['score'])

In [3]:
df["score_home_team"] = df["score"].str.split(":").str[0].astype(float)
df["score_away_team"] = df["score"].str.split(":").str[1].astype(float)
df["goal_difference"] = df["score_home_team"] - df["score_away_team"]

df["match_result"] = np.where(df['score_home_team'] > df['score_away_team'], '1', np.where(df['score_home_team'] < df['score_away_team'], '2', 'X'))

In [28]:
def func_home_wins(data):
    return (data[data == '1']).count()

def func_away_wins(data):
    return (data[data == '2']).count()

def func_tie(data):
    return (data[data == 'X']).count()

df_class_home = df.groupby(['division', 'season', 'home_team']).agg(
    GF=pd.NamedAgg(column='score_home_team', aggfunc='sum'),
    GA=pd.NamedAgg(column='score_away_team', aggfunc='sum'),
    W=pd.NamedAgg(column='match_result', aggfunc=func_home_wins),
    L=pd.NamedAgg(column='match_result', aggfunc=func_away_wins),
    T=pd.NamedAgg(column='match_result', aggfunc=func_tie)
).reset_index()

df_class_away = df.groupby(['division', 'season', 'away_team']).agg(
    GF=pd.NamedAgg(column='score_away_team', aggfunc='sum'),
    GA=pd.NamedAgg(column='score_home_team', aggfunc='sum'),
    W=pd.NamedAgg(column='match_result', aggfunc=func_away_wins),
    L=pd.NamedAgg(column='match_result', aggfunc=func_home_wins),
    T=pd.NamedAgg(column='match_result', aggfunc=func_tie)
).reset_index()

df_class_away.rename(columns={'away_team': 'team'}, inplace=True)
df_class_home.rename(columns={'home_team': 'team'}, inplace=True)

df_classification = df_class_away.merge(df_class_home, how='outer')
df_classification = df_classification.groupby(['season', 'team', 'division']).sum().reset_index()
# df_classification['GF'] = df_classification['GF'].shift(1)

df_classification['GD'] = df_classification['GF'] - df_classification['GA']
df_classification['Pts'] = (df_classification['W']) * 3 + df_classification['T']

df_classification['year_of_start'] = df_classification['season'].str.split("-").str[0].astype(int)

df_classification_ordered = df_classification.sort_values(by=['year_of_start'], ascending=False)
df_classification_ordered = df_classification_ordered.sort_values(by=['year_of_start', 'division'], ascending=[False, True])
df_classification_ordered = df_classification_ordered.sort_values(by=['year_of_start', 'division', 'Pts'], ascending=[False, True, False])
df_classification_ordered = df_classification_ordered.sort_values(by=['year_of_start', 'division', 'Pts', 'GD'], ascending=[False, True, False, False])
df_classification_ordered = df_classification_ordered.sort_values(by=['year_of_start', 'division', 'Pts', 'GD', 'GF'], ascending=[False, True, False, False, False])

df_classification_ordered = df_classification_ordered.reset_index(drop=True)

df_classification_ordered['rank'] = df_classification_ordered.groupby(['year_of_start', 'division']).cumcount() + 1

lowest_rank_div1= df_classification_ordered.groupby(['season'])['rank'].max().reset_index()
df_classification_ordered_next=df_classification_ordered.merge(lowest_rank_div1,how='outer')


df_classification_ordered['rank'] = df_classification_ordered.groupby(['division','team'])['rank'].shift(1)

df_with_rank = df_classification_ordered[['season','division','team','rank']]
print(df_classification_ordered_next)
# df_classification_ordered_next[df_classification_ordered_next['season']==2]


         season             team  division    GF    GA  W   L  T    GD  Pts  \
0     2021-2022      Real Madrid         1   8.0   4.0  2   0  1   4.0    7   
1     2021-2022     Ponferradina         2   4.0   1.0  3   0  0   3.0    9   
2     2021-2022       Sevilla FC         1   5.0   1.0  2   0  1   4.0    7   
3     2021-2022  Real Valladolid         2   5.0   1.0  2   0  1   4.0    7   
4     2021-2022         Valencia         1   5.0   1.0  2   0  1   4.0    7   
...         ...              ...       ...   ...   ... ..  .. ..   ...  ...   
2781  1928-1929  Athletic Madrid         1  43.0  41.0  8   8  2   2.0   26   
2782  1928-1929         Espanyol         1  32.0  38.0  7   7  4  -6.0   25   
2783  1928-1929        Catalunya         1  45.0  49.0  6   8  4  -4.0   22   
2784  1928-1929       Real Unión         1  40.0  42.0  5  11  2  -2.0   17   
2785  1928-1929           Racing         1  25.0  50.0  3  12  3 -25.0   12   

      year_of_start  rank  
0              2021    