In [None]:
# football_predictor.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import os


def predict_match_outcome():
    """
    Predicts the outcome of a football match using a Decision Tree classifier.
    The model is trained on data from a local CSV file.
    """
    print("Starting football match outcome prediction...")

    # --- Configuration ---
    filename = '/content/drive/MyDrive/csv files/2018-2019.csv'  # CHANGE THIS to your CSV file's name
    target_column = 'FTR'         # The column to predict (e.g., 'HomeWin', 'AwayWin', 'Draw')
    # List all the columns you want to use as input features for the model.
    # Exclude team names and the target column itself.
    feature_columns = ['HTHG', 'HTAG', 'AST', 'AF']

    # --- Step 1: Load the Data ---
    if not os.path.exists(filename):
        print(f"Error: The file '{filename}' was not found.")
        print("Please make sure your CSV file is in the same directory as this script.")
        return

    try:
        df = pd.read_csv(filename)
        print("Data loaded successfully.")
    except Exception as e:
        print(f"An error occurred while reading the CSV file: {e}")
        return

    # --- Step 2: Prepare the Data ---
    # Check if the required columns exist in the dataframe
    required_columns = feature_columns + [target_column]
    if not all(col in df.columns for col in required_columns):
        print("Error: Missing one or more required columns in the CSV file.")
        print(f"Expected columns: {required_columns}")
        print(f"Available columns: {list(df.columns)}")
        return

    # Drop any rows with missing values to ensure the model can be trained
    df.dropna(subset=required_columns, inplace=True)
    if df.empty:
        print("Error: No valid data rows left after removing missing values.")
        return

    # Define features (X) and target (y)
    X = df[feature_columns]
    y = df[target_column]

    # --- Step 3: Split the Data ---
    # Split the data into training and testing sets (80% training, 20% testing)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # --- Step 4: Create and Train the Model ---
    model = DecisionTreeClassifier()

    # Train the Decision Tree Classifier on the training data
    print("Training the Decision Tree model...")
    model.fit(X_train, y_train)
    print("Model training complete.")

    # --- Step 5: Make Predictions and Evaluate the Model ---
    # Predict the outcomes on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model's accuracy
    accuracy = metrics.accuracy_score(y_test, y_pred)
    print(f"\nModel Accuracy: {accuracy:.2f}")

    # You can also print a more detailed classification report
    print("\nClassification Report:")
    print(metrics.classification_report(y_test, y_pred))

if __name__ == "__main__":
    predict_match_outcome()

Starting football match outcome prediction...
Data loaded successfully.
Training the Decision Tree model...
Model training complete.

Model Accuracy: 0.62

Classification Report:
              precision    recall  f1-score   support

           A       0.65      0.74      0.69        27
           D       0.08      0.08      0.08        13
           H       0.79      0.72      0.75        36

    accuracy                           0.62        76
   macro avg       0.51      0.51      0.51        76
weighted avg       0.62      0.62      0.62        76

