In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error

In [None]:
df = pd.read_csv('/content/Latest_Football_Players_2024_Data.csv')

# paCE : Construct Stage



*   Determine which model is a appropriate
*   Constuct model
*   Confirme model assumptions
*   evaluate model results to detemine how well your model fits the data

In [None]:
X = df.drop(columns=['Seasons Ratings'])
y = df['Seasons Ratings']

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y, random_state=42,test_size=0.2)
X_train,y_train

(                  Teams    Seasons          Players  Matches  Goals  Assists
 432             Sevilla  2021/2022     Jules Kounde       44      3        1
 277             Chelsea  2021/2022      Kai Havertz       47     14        5
 721            Man City  2023/2024    Manuel Akanji       48      4        0
 991     Atletico Madrid  2016/2017    Thomas Partey       24      1        4
 678     Atletico Madrid  2021/2022  Rodrigo De Paul       48      4        1
 ...                 ...        ...              ...      ...    ...      ...
 1044          Wolfsburg  2017/2018   Victor Osimhen       13      0        0
 1095           Juventus  2021/2022    Adrien Rabiot       45      0        2
 1130           Brighton  2017/2018       Lewis Dunk       39      1        1
 860   Linares Deportivo  2022/2023     Fermin Lopez       40     12        0
 1126           Brighton  2021/2022       Lewis Dunk       31      1        0
 
 [972 rows x 6 columns],
 432     7.3
 277     7.1
 721     7.

In [None]:
categorical_features = ['Players', 'Teams']
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

In [None]:
numerical_features = ['Matches','Goals','Assists']
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

In [None]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

In [None]:
model.fit(X_train, y_train)

In [None]:
# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Mean Squared Error: 0.09012114341416196


In [None]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    'regressor__n_estimators': [100, 200, 300],
    'regressor__max_features': ['auto', 'sqrt', 'log2'],
    'regressor__max_depth': [None, 10, 20, 30]
}

# Grid search
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters
print(f'Best parameters: {grid_search.best_params_}')

36 fits failed out of a total of 108.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1152, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/pipeline.py", line 427, in fit
    self._final_estimator.fit(Xt, y, **fit_params_last_step)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/base.py", line 1145, in wrapper
    estimator._validate_params()
  File "/us

Best parameters: {'regressor__max_depth': None, 'regressor__max_features': 'sqrt', 'regressor__n_estimators': 200}


In [None]:
import joblib

joblib.dump(model, 'player_rating_model.pkl')

['player_rating_model.pkl']

In [None]:
model = joblib.load('player_rating_model.pkl')

In [None]:
import pandas as pd
import joblib
from sklearn.metrics import mean_squared_error

# Load the saved model
model = joblib.load('player_rating_model.pkl')

In [None]:
new_data = pd.DataFrame({
    'Players': ['messi', 'Ronaldo'],
    'Matches': [31, 35],
    'Seasons':['2022/2023','2019/2020'],
    'Goals': [28, 31],
    'Assists': [25, 20],
    'Teams': ['PSG', 'AL NASSR']
})

In [None]:
new_predictions = model.predict(new_data)

In [None]:
print('Predicted Ratings:', new_predictions)

Predicted Ratings: [7.927 7.884]
