In [5]:
# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 2: Load the data
df = pd.read_csv("epl_match_outcomes_synthetic.csv")

# Step 3: Encode categorical columns
le_team = LabelEncoder()
df['home_team_encoded'] = le_team.fit_transform(df['home_team'])
df['away_team_encoded'] = le_team.transform(df['away_team'])

le_result = LabelEncoder()
df['result_encoded'] = le_result.fit_transform(df['result'])

# Step 4: Select features and target
features = ['home_team_encoded', 'away_team_encoded',
            'home_team_form', 'away_team_form',
            'home_team_rank', 'away_team_rank',]
           
target = 'result_encoded'

X = df[features]
y = df[target]

# Step 5: Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 7: Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=le_result.classes_))


Accuracy: 0.33

Classification Report:
               precision    recall  f1-score   support

    Away Win       0.54      0.20      0.29        35
        Draw       0.19      0.15      0.17        27
    Home Win       0.33      0.58      0.42        38

    accuracy                           0.33       100
   macro avg       0.35      0.31      0.29       100
weighted avg       0.37      0.33      0.31       100



In [6]:
# Example: Predict an upcoming match
sample_match = pd.DataFrame({
    'home_team_encoded': [le_team.transform(['Arsenal'])[0]],
    'away_team_encoded': [le_team.transform(['Chelsea'])[0]],
    'home_team_form': [2.5],
    'away_team_form': [1.8],
    'home_team_rank': [3],
    'away_team_rank': [7],
    
})

predicted_result = model.predict(sample_match)
print("Predicted Result:", le_result.inverse_transform(predicted_result)[0])


Predicted Result: Home Win


In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC

# Train different models and compare accuracy
import joblib
joblib.dump(model, 'match_outcome_predictor.pkl')
joblib.dump(le_team, 'team_encoder.pkl')
joblib.dump(le_result, 'result_encoder.pkl')


['result_encoder.pkl']

In [8]:
import streamlit as st
import joblib
import pandas as pd

st.title("Premier League Match Outcome Predictor")

model = joblib.load("match_outcome_predictor.pkl")
le_team = joblib.load("team_encoder.pkl")
le_result = joblib.load("result_encoder.pkl")

teams = le_team.classes_

home_team = st.selectbox("Select Home Team", teams)
away_team = st.selectbox("Select Away Team", teams)

home_form = st.slider("Home Team Form", 0.0, 5.0, 2.5)
away_form = st.slider("Away Team Form", 0.0, 5.0, 2.5)

home_rank = st.slider("Home Team Rank", 1, 20, 10)
away_rank = st.slider("Away Team Rank", 1, 20, 10)



if st.button("Predict Result"):
    input_df = pd.DataFrame({
        'home_team_encoded': [le_team.transform([home_team])[0]],
        'away_team_encoded': [le_team.transform([away_team])[0]],
        'home_team_form': [home_form],
        'away_team_form': [away_form],
        'home_team_rank': [home_rank],
        'away_team_rank': [away_rank],
        
    })
    
    prediction = model.predict(input_df)
    result = le_result.inverse_transform(prediction)[0]
    st.success(f"Predicted Result: {result}")


2025-06-17 10:49:03.429 
  command:

    streamlit run C:\Users\Administrator\anaconda3\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
