In [10]:
import pandas as pd
import joblib
from keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

# Load the model
dt = joblib.load('DecisionTree_model.h5')
SVM = joblib.load('SVM_model.h5')
#NN = load_model('NeuralNetwork_model.h5', compile=False)
Logreg = joblib.load('logreg_model.h5')

In [11]:
df = pd.read_csv('mergedvalidation.csv')
df.sort_values(by=['HomeTeam'], ascending=True, inplace=True)
df.dropna(subset=['FTHG', 'FTAG', 'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HC', 'AC', 'HY', 'AY', 'HR', 'AR'], inplace=True)
scaler = StandardScaler()
normalized_data = scaler.fit_transform(df[['FTHG','FTAG','HS','AS','HST','AST','HF','AF','HC','AC','HY','AY','HR','AR']])
weights = [0.25, 0.15, 0.2, 0.1, 0.1, 0.1, 0.1]
home_team_rating = normalized_data[:, [0, 2, 4, 6, 8, 10, 12]].dot(weights)
away_team_rating = normalized_data[:, [1, 3, 5, 7, 9, 11, 13]].dot(weights)
X = pd.DataFrame({'HomeTeamRating': home_team_rating, 'AwayTeamRating': away_team_rating})
y = df['FTR'].apply(lambda x: 1 if x == 'H' else 0)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
#make predictions on the validation set for Decision tree
y_pred = dt.predict(X_test)
accuracy1 = accuracy_score(y_test, y_pred)
precision1, recall1, f1_score1,_= precision_recall_fscore_support(y_test, y_pred, average='binary')

#make prediction on the validation set for SVM
y_pred = SVM.predict(X_test)
accuracy2 = accuracy_score(y_test, y_pred)
precision2, recall2, f1_score2,_= precision_recall_fscore_support(y_test, y_pred, average='binary')

#make prediction on the validation set for Logistic regression
y_pred = Logreg.predict(X_test)
accuracy3 = accuracy_score(y_test, y_pred)
precision3, recall3, f1_score3,_= precision_recall_fscore_support(y_test, y_pred, average='binary')

print("Accuracy (Decision Tree):", accuracy1, "\n", "Precision (Decision Tree):", precision1 , "\n", "Recall (Descision Tree):", recall1, "\n", "F1-score (Descision Tree):", f1_score1 )
print("Accuracy (SVM):", accuracy2, "\n", "Precision (SVM):", precision2 , "\n", "Recall (SVM):", recall2, "\n", "F1-score (SVM):", f1_score2 )
print("Accuracy (Logistic Reg):", accuracy3, "\n", "Precision (Logistic Reg):", precision3 ,"\n", "Recall (Logistic Reg):", recall3, "\n", "F1-score (Logistic Reg):", f1_score3 )


Accuracy (Decision Tree): 0.6197368421052631 
 Precision (Decision Tree): 0.6411960132890365 
 Recall (Descision Tree): 0.516042780748663 
 F1-score (Descision Tree): 0.5718518518518518
Accuracy (SVM): 0.6421052631578947 
 Precision (SVM): 0.656441717791411 
 Recall (SVM): 0.5721925133689839 
 F1-score (SVM): 0.6114285714285714
Accuracy (Logistic Reg): 0.6789473684210526 
 Precision (Logistic Reg): 0.7044025157232704 
 Recall (Logistic Reg): 0.5989304812834224 
 F1-score (Logistic Reg): 0.6473988439306357




In [12]:
print(X)

      HomeTeamRating  AwayTeamRating
0          -0.265871        0.414885
1           1.452766        0.422443
2           0.316712       -0.663916
3          -0.025126       -0.084850
4           0.460326       -0.781294
...              ...             ...
3795        0.281330       -0.110667
3796       -0.559876       -0.012675
3797       -0.820714        1.302073
3798       -0.558851       -0.917643
3799       -0.020531        0.642986

[3800 rows x 2 columns]
