In [7]:
from google.colab import files
uploaded = files.upload()  # Upload `combined_data.csv` and `filtered_data.csv`


Saving combined_data.csv to combined_data.csv
Saving filtered_data.csv to filtered_data.csv


In [9]:
import pandas as pd

# Load the CSV files
combined_data = pd.read_csv("combined_data.csv")
filtered_data = pd.read_csv("filtered_data.csv")

# Verify the data
print("Combined Data Shape:", combined_data.shape)
print("Filtered Data Shape:", filtered_data.shape)


Combined Data Shape: (3568, 26)
Filtered Data Shape: (3568, 26)


In [10]:
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.model_selection import train_test_split
import joblib

# Step 1: Define features and target
features = ["HomeGoalAvg", "AwayGoalAvg", "HomeWinRate", "AwayWinRate"]
target = "FTR"

# Ensure the target column is numeric
target_map = {"H": 0, "D": 1, "A": 2}
filtered_data[target] = filtered_data[target].map(target_map)

# Step 2: Split data into features (X) and target (y)
X = filtered_data[features]
y = filtered_data[target]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Train models
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

# Combine into an ensemble
ensemble_model = VotingClassifier(
    estimators=[("rf", rf_model), ("gb", gb_model)],
    voting="soft"
)
ensemble_model.fit(X_train, y_train)

# Step 4: Save the trained model
joblib.dump(ensemble_model, "ensemble_model.pkl")
print("Model training complete. Saved as 'ensemble_model.pkl'.")

# Optional: Download the model
from google.colab import files
files.download("ensemble_model.pkl")


Model training complete. Saved as 'ensemble_model.pkl'.


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>