In [5]:
import json
from os import makedirs
from time import time
import pandas as pd
from joblib import dump
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC


def train_classifier(clf, X_train, y_train):
    start = time()
    clf.fit(X_train, y_train)
    end = time()
    print("Model trained in {:2f} seconds".format(end - start))


def predict_labels(clf, features, target):
    start = time()
    y_pred = clf.predict(features)
    end = time()
    print("Made Predictions in {:2f} seconds".format(end - start))

    acc = sum(target == y_pred) / float(len(y_pred))

    return f1_score(target, y_pred, average='micro'), acc


def model(clf, X_train, y_train, X_test, y_test):
    train_classifier(clf, X_train, y_train)

    f1, acc = predict_labels(clf, X_train, y_train)
    print("Training Info:")
    print("-" * 20)
    print("F1 Score:{}".format(f1))
    print("Accuracy:{}".format(acc))

    f1, acc = predict_labels(clf, X_test, y_test)
    print("Test Metrics:")
    print("-" * 20)
    print("F1 Score:{}".format(f1))
    print("Accuracy:{}".format(acc))


In [6]:
data = pd.read_csv('pre.csv')

input_col = ['home_encoded', 'away_encoded', 'HS',
             'AS', 'HST', 'AST', 'WRH', 'WRA', 'PH', 'PA']

encoder = LabelEncoder()
home_encoded = encoder.fit_transform(data['HomeTeam'])
home_encoded_mapping = dict(
    zip(encoder.classes_, encoder.transform(encoder.classes_).tolist()))
data['home_encoded'] = home_encoded

encoder = LabelEncoder()
away_encoded = encoder.fit_transform(data['AwayTeam'])
away_encoded_mapping = dict(
    zip(encoder.classes_, encoder.transform(encoder.classes_).tolist()))
data['away_encoded'] = away_encoded

print(data[data.isna().any(axis=1)])
data = data.dropna(axis=0)



# Training & Testing

X = data[input_col]
Y = data['FTR']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)
svc_classifier = SVC(random_state=100, kernel='rbf')
lr_classifier = LogisticRegression(multi_class='ovr', max_iter=500,class_weight='balanced')
print()
print("Logistic Regression")
print("-" * 20)
model(lr_classifier, X_train, Y_train, X_test, Y_test)

# Exporting the Model
print()
print()

exportedModelsPath = 'exportedModels'
makedirs(exportedModelsPath, exist_ok=True)
dump(lr_classifier, f'{exportedModelsPath}/lr_classifier.model')

exportMetaData = dict()
exportMetaData['home_teams'] = home_encoded_mapping
exportMetaData['away_teams'] = away_encoded_mapping

exportMetaDataFile = open(f'{exportedModelsPath}/metadata.json', 'w')
json.dump(exportMetaData, exportMetaDataFile)

print(f'Model(s) exported successfully to {exportedModelsPath}/')

Empty DataFrame
Columns: [Date, HomeTeam, AwayTeam, HS, AS, HST, AST, WRH, WRA, PH, PA, FTR, home_encoded, away_encoded]
Index: []

Logistic Regression
--------------------
Model trained in 0.012998 seconds
Made Predictions in 0.001002 seconds
Training Info:
--------------------
F1 Score:0.7015037593984963
Accuracy:0.7015037593984963
Made Predictions in 0.001001 seconds
Test Metrics:
--------------------
F1 Score:0.7035087719298245
Accuracy:0.7035087719298245


Model(s) exported successfully to exportedModels/


