In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import os

# File Paths
base_dir = r"C:\Users\Nitro\Downloads"
results_file = os.path.join(base_dir, "results.csv")
name_changes_file = os.path.join(base_dir, "former_names.csv")

# Load Data
results = pd.read_csv(results_file)
name_changes = pd.read_csv(name_changes_file)

# Standardize Team Names
name_changes = name_changes.rename(columns={"former": "old_name", "current": "new_name"})
for col in ['home_team', 'away_team']:
    results = results.merge(name_changes[['old_name', 'new_name']], 
                            left_on=col, right_on="old_name", how="left")
    results[col] = results["new_name"].combine_first(results[col])
    results.drop(columns=['old_name', 'new_name'], inplace=True)

# Validate Columns
required_columns = ['date', 'home_team', 'away_team', 'home_score', 'away_score', 'tournament', 'city', 'country', 'neutral']
missing_cols = [col for col in required_columns if col not in results.columns]
if missing_cols:
    raise ValueError(f"Missing columns in results.csv: {missing_cols}")

# Feature Engineering
results['goal_diff'] = results['home_score'] - results['away_score']
results['year'] = pd.to_datetime(results['date']).dt.year
results['match_result'] = results['goal_diff'].apply(lambda x: 0 if x > 0 else (1 if x == 0 else 2))

# Rolling Win/Draw/Loss Rates
def rolling_avg(team_col, result_col, value):
    return results.groupby(team_col)[result_col].transform(lambda x: (x == value).rolling(5, min_periods=1).mean())

results['home_win_rate'] = rolling_avg('home_team', 'match_result', 0)
results['home_draw_rate'] = rolling_avg('home_team', 'match_result', 1)
results['home_loss_rate'] = rolling_avg('home_team', 'match_result', 2)
results['away_win_rate'] = rolling_avg('away_team', 'match_result', 2)
results['away_draw_rate'] = rolling_avg('away_team', 'match_result', 1)
results['away_loss_rate'] = rolling_avg('away_team', 'match_result', 0)

# Home Advantage
results['home_advantage'] = results['neutral'].apply(lambda x: 0 if x else 1)

# One-Hot Encoding
results = pd.get_dummies(results, columns=['tournament', 'city', 'country'])

# Team Encoding
teams = pd.concat([results['home_team'], results['away_team']]).unique()
team_map = {name: idx for idx, name in enumerate(teams)}
results['home_team_id'] = results['home_team'].map(team_map)
results['away_team_id'] = results['away_team'].map(team_map)
num_teams = len(team_map)

# Feature Scaling
scaler = MinMaxScaler()
scaled_features = ['year', 'home_win_rate', 'home_draw_rate', 'home_loss_rate',
                   'away_win_rate', 'away_draw_rate', 'away_loss_rate', 'home_advantage']
results[scaled_features] = scaler.fit_transform(results[scaled_features])

# Train-Test Split
train_data, val_data = train_test_split(results, test_size=0.2, random_state=42)

X_train = train_data[['home_team_id', 'away_team_id'] + scaled_features]
y_train = keras.utils.to_categorical(train_data['match_result'], num_classes=3)

X_val = val_data[['home_team_id', 'away_team_id'] + scaled_features]
y_val = keras.utils.to_categorical(val_data['match_result'], num_classes=3)

# Neural Network Model
home_input = keras.layers.Input(shape=(1,), name='home_team')
away_input = keras.layers.Input(shape=(1,), name='away_team')
numeric_input = keras.layers.Input(shape=(len(scaled_features),), name='numeric_features')

embedding_size = int(np.sqrt(num_teams))  
home_embed = keras.layers.Embedding(input_dim=num_teams, output_dim=embedding_size)(home_input)
away_embed = keras.layers.Embedding(input_dim=num_teams, output_dim=embedding_size)(away_input)

home_flat = keras.layers.Flatten()(home_embed)
away_flat = keras.layers.Flatten()(away_embed)

concat = keras.layers.Concatenate()([home_flat, away_flat, numeric_input])

dense = keras.layers.Dense(128, activation='swish', kernel_regularizer=keras.regularizers.l2(0.0001))(concat)
dense = keras.layers.Dropout(0.2)(dense)
dense = keras.layers.Dense(64, activation='swish', kernel_regularizer=keras.regularizers.l2(0.0001))(dense)
dense = keras.layers.Dropout(0.2)(dense)
dense = keras.layers.Dense(32, activation='swish')(dense)

output = keras.layers.Dense(3, activation="softmax", name="match_result")(dense)

# Compile Model
model = keras.Model(inputs=[home_input, away_input, numeric_input], outputs=output)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

# Train Model
model.fit({'home_team': X_train['home_team_id'], 'away_team': X_train['away_team_id'], 'numeric_features': X_train[scaled_features]},
          y_train,
          epochs=44, batch_size=32, verbose=1, 
          validation_data=({'home_team': X_val['home_team_id'], 'away_team': X_val['away_team_id'], 'numeric_features': X_val[scaled_features]}, 
                           y_val))

# Evaluate Model
val_results = model.evaluate({'home_team': X_val['home_team_id'], 'away_team': X_val['away_team_id'], 'numeric_features': X_val[scaled_features]}, 
                             y_val, verbose=1)

print(f"Final Accuracy: {val_results[1]:.4f}")

# Prediction Function
def predict_match(home_team_name, away_team_name, match_year):
    home_team = team_map.get(home_team_name, -1)
    away_team = team_map.get(away_team_name, -1)

    if home_team == -1 or away_team == -1:
        print(f"Error: Unknown team - {home_team_name if home_team == -1 else away_team_name}")
        return

    home_win_rate = results[results['home_team'] == home_team_name]['home_win_rate'].mean()
    home_draw_rate = results[results['home_team'] == home_team_name]['home_draw_rate'].mean()
    home_loss_rate = results[results['home_team'] == home_team_name]['home_loss_rate'].mean()

    away_win_rate = results[results['away_team'] == away_team_name]['away_win_rate'].mean()
    away_draw_rate = results[results['away_team'] == away_team_name]['away_draw_rate'].mean()
    away_loss_rate = results[results['away_team'] == away_team_name]['away_loss_rate'].mean()

    input_data = {'home_team': np.array([home_team]), 'away_team': np.array([away_team]), 
                  'numeric_features': scaler.transform(np.array([[match_year, home_win_rate, home_draw_rate, home_loss_rate, 
                                                                  away_win_rate, away_draw_rate, away_loss_rate, 1]]))}

    prediction = model.predict(input_data, verbose=0)
    win_prob, draw_prob, loss_prob = prediction[0]

    print(f"Home Win Probability: {win_prob*100:.2f}%")
    print(f"Draw Probability: {draw_prob*100:.2f}%")
    print(f"Away Win Probability: {loss_prob*100:.2f}%")

# Example Prediction
predict_match("Germany", "France", 2025)

Epoch 1/44
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.7497 - loss: 0.5180 - val_accuracy: 0.7726 - val_loss: 0.4714
Epoch 2/44
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7816 - loss: 0.4601 - val_accuracy: 0.7741 - val_loss: 0.4662
Epoch 3/44
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7798 - loss: 0.4615 - val_accuracy: 0.7756 - val_loss: 0.4645
Epoch 4/44
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7793 - loss: 0.4545 - val_accuracy: 0.7736 - val_loss: 0.4648
Epoch 5/44
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7843 - loss: 0.4505 - val_accuracy: 0.7749 - val_loss: 0.4615
Epoch 6/44
[1m1200/1200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.7899 - loss: 0.4431 - val_accuracy: 0.7769 - val_loss: 0.4603
Epoch 7/44
[1m1

