# Production Model

This notebook contains the final production model and insights derived from it.

-------

### Production Player Model

In [55]:
import pandas as pd
import joblib
import pickle
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

In [65]:
player_data.dtypes

Player                   object
Age                     float64
Nation                   object
Pos                      object
Team                     object
Performance-Gls         float64
Performance-Ast         float64
Performance-G+A         float64
Performance-G-PK        float64
Performance-PK          float64
Performance-CrdY        float64
Performance-CrdR        float64
Playing Time-Min        float64
Playing Time-MP         float64
Playing Time-90s        float64
Progression-PrgC        float64
Progression-PrgP        float64
Progression-PrgR        float64
Expected-xG             float64
Expected-npxG           float64
Standard-Sh             float64
Standard-SoT            float64
PPA                     float64
CrsPA                   float64
KP                      float64
Total-Cmp%              float64
Team Success-PPM        float64
Team Success-+/-90      float64
Tackles-Tkl             float64
Performance-Int         float64
Blocks-Sh               float64
Clr     

In [75]:
# Load the player dataset
player_data = pd.read_csv("../data/player_stats_by_team/combined/filtered_player_stats.csv")

# Split features and target
X_player = player_data.drop(columns=["Balanced_Performance","Nation","Pos","Team","Player"])
y_player = player_data["Balanced_Performance"]

# Split into train/test sets
X_train_player, X_test_player, y_train_player, y_test_player = train_test_split(
    X_player, y_player, test_size=0.2, random_state=42
)
print(f"X_train_player shape: {X_train_player.shape}")
print(f"X_test_player shape: {X_test_player.shape}")

X_train_player shape: (211, 28)
X_test_player shape: (53, 28)


In [76]:
# Train Linear Regression
linear_model = LinearRegression()
linear_model.fit(X_train_player, y_train_player)

# Save the Linear Regression model
with open('../streamlit/linear_regression_player_model.pkl', 'wb') as model_file:
    pickle.dump(linear_model, model_file)
print("Linear Regression model saved")

Linear Regression model saved


### Production Club Model

In [73]:
# Load the club dataset
club_data = pd.read_csv("../data/premier_league_by_table/combined/filtered_club_stats.csv")

# Split features and target
X_club = club_data.drop(columns=["Pts/MP","Squad"])  # Features
y_club = club_data["Pts/MP"]  # Target

print(f"Original X_club shape: {X_club.shape}")

# Split into train/test sets
X_train_club, X_test_club, y_train_club, y_test_club = train_test_split(
    X_club, y_club, test_size=0.2, random_state=42
)

print(f"X_train_club shape: {X_train_club.shape}")
print(f"X_test_club shape: {X_test_club.shape}")

Original X_club shape: (20, 55)
X_train_club shape: (16, 55)
X_test_club shape: (4, 55)


In [74]:
# Train XGBoost
xgboost_model = XGBRegressor(n_estimators=200, max_depth=10, random_state=42)
xgboost_model.fit(X_train_club, y_train_club)

# Save the XGBoost model
with open("../streamlit/xgboost_club_model.pkl", "wb") as model_file:
    pickle.dump(xgboost_model, model_file)
print("XGBoost model saved")

# Save the feature names
feature_names = X_train_club.columns.tolist()
with open("../streamlit/club_model_feature_names.pkl", "wb") as f:
    pickle.dump(feature_names, f)
print("Feature names saved")

XGBoost model saved
Feature names saved
