In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge
from sklearn.cluster import KMeans

In [6]:
df_matches = pd.read_csv('../club_data.csv')
df_matches['diferencia_goles'] = df_matches['goles_local'] - df_matches['goles_visitante']

X_macthes = df_matches[['posesion_local (%)', 'tiros_arco_local']]
y_matches = df_matches['diferencia_goles']

X_train, X_test, y_train, y_test = train_test_split(X_macthes, y_matches, test_size=0.2, random_state=42)

df_players = pd.read_csv('../no_supervised/jugadores_cebollitas.csv')
X_players = df_players[['goles', 'asistencias', 'pases_completados (%)', 'tiros_al_arco']]

In [7]:
pipeline_supervised = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', Ridge(alpha=1.0))
])
pipeline_supervised.fit(X_train, y_train)

0,1,2
,steps,"[('scaler', ...), ('ridge', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [10]:
pipeline_not_supervised = Pipeline([
    ('scaler', StandardScaler()),
    ('kmeans', KMeans(n_clusters=3, random_state=42))
])
df_players['cluster'] = pipeline_not_supervised.fit_predict(X_players)



In [11]:
new_match = pd.DataFrame({'posesion_local (%)': [58], 'tiros_arco_local': [9]})
predicted_difference = pipeline_supervised.predict(new_match)

profiles_players = df_players[['nombre', 'cluster']].head(10)

print("Predicted goal difference for the new match:", predicted_difference.round(2))
print("Player profiles with clusters:\n", profiles_players)


Predicted goal difference for the new match: [0.25]
Player profiles with clusters:
                 nombre  cluster
0         Tara Alvarez        1
1        Carol Mcclain        2
2        Robert Martin        1
3    Mr. Robert Turner        2
4  Christopher Kennedy        1
5  Dr. Justin Anderson        0
6      Sandra Shepherd        1
7          Mark Bowers        0
8         Kenneth Cook        1
9    Christopher Parks        2


In [12]:
import ipywidgets as widgets

posesion_widget = widgets.IntSlider(
    min=40,
    max=70,
    description='Posesión Local (%)',
)

tiros_widget = widgets.IntSlider(
    min=1,
    max=15,
    description='Tiros al Arco Local',
)

def predict_goal_difference(posesion, tiros):
    data = pd.DataFrame({
        'posesion_local (%)': [posesion],
        'tiros_arco_local': [tiros]
    })
    pred = pipeline_supervised.predict(data)[0]
    print(f"Predicted goal difference: {pred.round(2)}")

widgets.interact(predict_goal_difference, posesion=posesion_widget, tiros=tiros_widget)

interactive(children=(IntSlider(value=40, description='Posesión Local (%)', max=70, min=40), IntSlider(value=1…

<function __main__.predict_goal_difference(posesion, tiros)>