In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Load and preprocess the data
df1 = pd.read_csv('2014-15.csv')
df2 = pd.read_csv('2015-16.csv')
df3 = pd.read_csv('2016-17.csv')
df4 = pd.read_csv('2017-18.csv')
df5 = pd.read_csv('2018-19.csv')
df6 = pd.read_csv('2019-20.csv')
df7 = pd.read_csv('2021.csv')
df8 = pd.read_csv('2022.csv')
df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8])
df.sort_values(by=['HomeTeam'], ascending=True, inplace=True)
df.dropna(subset=['FTHG', 'FTAG', 'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HC', 'AC', 'HY', 'AY', 'HR', 'AR'], inplace=True)
scaler = StandardScaler()
normalized_data = scaler.fit_transform(df[['FTHG','FTAG','HS','AS','HST','AST','HF','AF','HC','AC','HY','AY','HR','AR']])
weights = [0.25, 0.15, 0.2, 0.1, 0.1, 0.1, 0.1]
home_team_rating = normalized_data[:, [0, 2, 4, 6, 8, 10, 12]].dot(weights)
away_team_rating = normalized_data[:, [1, 3, 5, 7, 9, 11, 13]].dot(weights)
X = pd.DataFrame({'HomeTeamRating': home_team_rating, 'AwayTeamRating': away_team_rating})
y = df['FTR'].apply(lambda x: 1 if x == 'H' else 0)

# Split the data into training, validation, and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1000)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=1000)

# Create the neural network model
model = Sequential()
model.add(Dense(16, input_dim=2, activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# Make predictions and evaluate the model performance on validation set
y_pred_val = np.round(model.predict(X_val))
accuracy_val = accuracy_score(y_val, y_pred_val)
precision_val, recall_val, f1_score_val,_= precision_recall_fscore_support(y_val, y_pred_val, average='binary')

print("Validation set performance:")
print("Accuracy:", accuracy_val)
print("Precision:", precision_val)
print("Recall:", recall_val)
print("F1-score:", f1_score_val)

# Make predictions and evaluate the model performance on testing set

y_pred_test = np.round(model.predict(X_test))
accuracy_test = accuracy_score(y_test, y_pred_test)
precision_test, recall_test, f1_score_test, _ = precision_recall_fscore_support(y_test, y_pred_test, average='binary')

print("Testing set performance:")
print("Accuracy:", accuracy_test)
print("Precision:", precision_test)
print("Recall:", recall_test)









Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Validation set performance:
Accuracy: 0.7648148148148148
Precision: 0.7715736040609137
Recall: 0.6495726495726496
F1-score: 0.7053364269141532
Testing set performance:
Accuracy: 0.7277777777777777
Precision: 0.735
Recall: 0.6099585062240664
