## Dependencies

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import xgboost as xgb

## Load dataset

In [2]:
df = pd.read_csv('match_score_dataset_basketball.csv')

## Convert categorical columns

In [3]:
df['Team 1'] = df['Team 1'].astype('category').cat.codes
df['Team 2'] = df['Team 2'].astype('category').cat.codes
df['Winner'] = df['Winner'].astype('category').cat.codes

## Define features (X) and target (y)

In [4]:
X = df[['Team 1', 'Team 2', 'Score 1', 'Score 2']]  # Feature columns
y = df['Winner']  # Target column

## Train-Test Split

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Random Forest Model


In [6]:
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

## Evaluate Random Forest

In [7]:
print("Random Forest Results")
print(f"Accuracy: {accuracy_score(y_test, y_pred_rf) * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred_rf))

Random Forest Results
Accuracy: 60.56%
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.60      1.00      0.75         3
           2       0.71      0.80      0.75        15
           3       0.80      0.80      0.80        10
           4       1.00      0.50      0.67         2
           5       0.00      0.00      0.00         1
           6       0.75      0.60      0.67         5
           7       0.00      0.00      0.00         4
           8       0.00      0.00      0.00         4
           9       0.25      1.00      0.40         1
          10       1.00      0.50      0.67         2
          12       0.50      0.50      0.50         2
          13       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         2
          16       0.00      0.00      0.00         1
          19       0.40      0.67      0.50         3
          21       1.00    

## Logistic Regression Model


In [8]:
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train, y_train)
y_pred_lr = lr_model.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## Evaluate Logistic Regression

In [9]:
print("\nLogistic Regression Results")
print(f"Accuracy: {accuracy_score(y_test, y_pred_lr) * 100:.2f}%")
print("Classification Report:")
print(classification_report(y_test, y_pred_lr))


Logistic Regression Results
Accuracy: 20.42%
Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         3
           2       0.27      0.53      0.36        15
           3       0.17      0.20      0.18        10
           4       0.00      0.00      0.00         2
           5       0.00      0.00      0.00         1
           6       0.00      0.00      0.00         5
           7       0.00      0.00      0.00         4
           8       0.00      0.00      0.00         4
           9       0.00      0.00      0.00         1
          10       0.00      0.00      0.00         2
          12       0.00      0.00      0.00         2
          13       0.00      0.00      0.00         1
          14       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         2
          16       0.00      0.00      0.00         1
          19       0.00      0.00      0.00         3
          21       0.12    

## Confusion Matrix

In [16]:
print("\nConfusion Matrix for Random Forest:")
print(confusion_matrix(y_test, y_pred_rf))

print("\nConfusion Matrix for Logistic Regression:")
print(confusion_matrix(y_test, y_pred_lr))


Confusion Matrix for Random Forest:
[[ 0  1  0 ...  0  0  0]
 [ 0  3  0 ...  0  0  0]
 [ 0  0 12 ...  0  0  0]
 ...
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  0]
 [ 0  0  0 ...  0  0  5]]

Confusion Matrix for Logistic Regression:
[[0 0 1 ... 0 0 0]
 [0 0 1 ... 0 0 0]
 [0 0 8 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]
