In [956]:
# IMPORT NECESSARY LIBRARY
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from IPython.display import display, HTML, clear_output
import ipywidgets as widgets

In [957]:
# 1. Initialization and Data Loading
np.random.seed(42)
teams = pd.read_csv("FIFA_REAL_DATA.csv")
groups = pd.read_csv("groups.csv")

In [958]:
# 1. Define the features to be used
# We include all relevant columns to ensure a comprehensive analysis
all_features = [
    'Elo_Ratings', 'Fifa_Rating_Points', 'TOTAL_VALUE_MILLIONS',
    'Home_Advantage', 'World Cup Titles', 'WC_PARTICIPATIONS',
    'Coach_Experience_Score', 'Key_Player_Total_Goals_2025',
    'Goals_For', 'Goals_Against', 'AVG_Points_LAST_10_MATCH', 'AVG_AGE', 'Total_Matches'
]

# Fill missing values with the median to maintain data integrity
teams[all_features] = teams[all_features].fillna(teams[all_features].median())

# Create Tactical Strength Metrics
# These calculate the historical scoring and conceding efficiency per match
teams['Defense_Strength'] = teams['Goals_Against'] / teams['Total_Matches']
teams['Offense_Strength'] = teams['Goals_For'] / teams['Total_Matches']

# Final Feature Set for the Model (10 core features)
final_feature_set = [
    'Elo_Ratings',                # index 0
    'Home_Advantage',             # index 1
    'World Cup Titles',           # index 2
    'TOTAL_VALUE_MILLIONS',       # index 3
    'Coach_Experience_Score',     # index 4
    'AVG_Points_LAST_10_MATCH',   # index 5
    'Offense_Strength',           # index 6
    'Defense_Strength',           # index 7 (Negative impact)
    'Key_Player_Total_Goals_2025',# index 8
    'AVG_AGE'                     # index 9 (Negative impact)
]

# Scale the data to normalize values (Mean=0, StdDev=1)
scaler = StandardScaler()
teams_scaled = teams.copy()
teams_scaled[final_feature_set] = scaler.fit_transform(teams[final_feature_set])

print("‚úÖ Step 1: Feature Engineering Complete. Data is scaled and tactical metrics are ready.")

‚úÖ Step 1: Feature Engineering Complete. Data is scaled and tactical metrics are ready.


In [959]:
# 2. Model Training
X, y = [], []

# Convert to matrix for faster iteration
features_matrix = teams_scaled[final_feature_set].values

for i in range(len(features_matrix)):
    for j in range(i+1, len(features_matrix)):
        diff = features_matrix[i] - features_matrix[j]

        # Weights
        score = (
            diff[0] * 0.45 +   # Elo Ratings
            diff[1] * 0.02 +   # Home Advantage
            diff[2] * 0.12 +   # World Cup Titles
            diff[3] * 0.15 +   # Total Market Value
            diff[4] * 0.05 +   # Coach Experience
            diff[5] * 0.10 +   # Current Form last 10 match
            diff[6] * 0.05 +   # Offense Strength
            diff[7] * -0.02 +  # Defense Strength
            diff[8] * 0.08 +   # Key Player Impact
            diff[9] * -0.06    # Average Age
        )

        # Labeling: Win (2), Draw (1), Loss (0)
        # Threshold 0.20 is set to allow more decisive victories for elite teams
        label = 2 if score > 0.20 else (0 if score < -0.20 else 1)

        # Add symmetric data to the training set
        X.append(diff)
        y.append(label)
        X.append(-diff)
        y.append(0 if label == 2 else (2 if label == 0 else 1))

# Train the Random Forest Classifier with 3000 estimators for high stability
model = RandomForestClassifier(n_estimators=3000, random_state=42,max_depth=12)
model.fit(X, y)

print("‚úÖ Step 2: Model Trained Succesfully")

‚úÖ Step 2: Model Trained Succesfully


In [960]:
# CELL 4: UI Styling & Global Flags Logic
def get_flag(team):
    # Mapping country names to ISO codes for accurate flag retrieval
    flags_map = {
        'Argentina': 'ar', 'Brazil': 'br', 'France': 'fr', 'Germany': 'de', 'Spain': 'es',
        'England': 'gb-eng', 'Portugal': 'pt', 'Netherlands': 'nl', 'Italy': 'it', 'Belgium': 'be',
        'Morocco': 'ma', 'United Stated': 'us', 'Mexico': 'mx', 'Canada': 'ca', 'Japan': 'jp',
        'Korea Republic': 'kr', 'Saudi Arabia': 'sa', 'Ivory Coast': 'ci', 'Sweden': 'se',
        'Tunisia': 'tn', 'Haiti': 'ht', 'Turkey': 'tr', 'South Africa': 'za', 'Jamaica': 'jm',
        'Iraq': 'iq', 'Jordan': 'jo', 'Egypt': 'eg', 'Algeria': 'dz', 'Norway': 'no',
        'Austria': 'at', 'Australia': 'au', 'Croatia': 'hr', 'Switzerland': 'ch', 'Uruguay': 'uy',
        'Colombia': 'co', 'Senegal': 'sn', 'Denmark': 'dk', 'Poland': 'pl', 'Ecuador': 'ec'
    }
    code = flags_map.get(team, team[:2].lower())
    return f'<img src="https://flagcdn.com/w40/{code}.png" style="width:25px; height:auto; margin-right:10px; border-radius:2px; vertical-align:middle;">'

style = """
<style>
    @import url('https://fonts.googleapis.com/css2?family=Oswald:wght@500&display=swap');
    .wc-body { background-color: #0a192f; color: #ccd6f6; font-family: 'Segoe UI', sans-serif; padding: 25px; text-align: center; }
    .main-title { color: #ffffff; font-family: 'Oswald', sans-serif; font-size: 3em; text-shadow: 0 0 15px #64ffda; margin-bottom: 30px; border-bottom: 3px solid #112240; padding-bottom: 15px; text-transform: uppercase; }
    .stage-label { background: #112240; color: #64ffda; padding: 12px 35px; margin: 40px auto 20px auto; border-radius: 50px; width: fit-content; font-weight: bold; border: 2px solid #64ffda; font-size: 1.5em; text-transform: uppercase; }
    .grid-sys { display: grid; grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); gap: 18px; text-align: left; }
    .group-box { background: #112240; border-radius: 12px; padding: 18px; border: 1px solid #233554; }
    .team-line { display: flex; justify-content: space-between; align-items: center; padding: 9px 0; border-bottom: 1px solid #1d2d44; font-size: 1.1em; }
    .match-card { background: #112240; margin: 12px auto; padding: 18px; border-radius: 12px; border: 1px solid #233554; display: flex; justify-content: space-between; align-items: center; width: 85%; font-size: 1.2em; }
    .win-badge { background: #64ffda; color: #0a192f; padding: 5px 15px; border-radius: 5px; font-weight: bold; font-size: 0.8em; }
</style>
"""

def predict_match(team_a, team_b, knockout=False):
    tA = teams_scaled[teams_scaled['TEAM_NAME'] == team_a].iloc[0]
    tB = teams_scaled[teams_scaled['TEAM_NAME'] == team_b].iloc[0]
    diff = (tA[final_feature_set] - tB[final_feature_set]).values.reshape(1, -1)
    probs = model.predict_proba(diff)[0]
    res = np.random.choice([0, 1, 2], p=probs)
    if knockout and res == 1:
        p_a = 0.65 if tA['Elo_Ratings'] > tB['Elo_Ratings'] else 0.35
        return np.random.choice([team_a, team_b], p=[p_a, 1-p_a])
    return team_a if res == 2 else (team_b if res == 0 else "DRAW")

print("‚úÖ UI Styles and Global Config Loaded.")

‚úÖ UI Styles and Global Config Loaded.


In [961]:
# CELL 5: Group Stage Simulation
html_out = style + "<div class='wc-body'><h1 class='main-title'>FIFA World Cup 2026 Simulation</h1>"
html_out += "<div class='stage-label'>Group Standings</div><div class='grid-sys'>"

group_data = groups.merge(teams_scaled, on='TEAM_NAME')
results_log = []
for g_name, df in group_data.groupby('Group'):
    pts = {t: 0 for t in df['TEAM_NAME']}
    for i in range(len(df)):
        for j in range(i+1, len(df)):
            w = predict_match(df.iloc[i]['TEAM_NAME'], df.iloc[j]['TEAM_NAME'])
            if w == "DRAW": pts[df.iloc[i]['TEAM_NAME']]+=1; pts[df.iloc[j]['TEAM_NAME']]+=1
            else: pts[w]+=3

    st = sorted(pts.items(), key=lambda x: x[1], reverse=True)
    html_out += f"<div class='group-box'><div style='text-align:center; color:#8892b0; margin-bottom:15px; font-weight:bold;'>GROUP {g_name}</div>"
    for t, p in st:
        html_out += f"<div class='team-line'><span>{get_flag(t)} {t}</span><span style='color:#64ffda; font-weight:bold;'>{p} PTS</span></div>"
        results_log.append([g_name, t, p])
    html_out += "</div>"
html_out += "</div></div>"

f_df = pd.DataFrame(results_log, columns=['G', 'T', 'P']).sort_values(['G', 'P'], ascending=[True, False])
q32 = f_df.groupby('G').head(2)['T'].tolist()
q32.extend(f_df.groupby('G').nth(2).sort_values('P', ascending=False).head(8)['T'].tolist())

display(HTML(html_out))

In [962]:
# CELL 6: Knockout Rounds Execution
def run_knockout(t_list, title):
    out = f"<div class='stage-label'>{title}</div>"
    winners = []
    np.random.shuffle(t_list)
    for i in range(0, len(t_list), 2):
        t1, t2 = t_list[i], t_list[i+1]
        w = predict_match(t1, t2, knockout=True)
        out += f"<div class='match-card'><span>{get_flag(t1)} {t1} VS {t2} {get_flag(t2)}</span><span class='win-badge'>WINNER: {w}</span></div>"
        winners.append(w)
    return winners, out

r32, h32 = run_knockout(q32, "Round of 32")
r16, h16 = run_knockout(r32, "Round of 16")
qf, hqf = run_knockout(r16, "Quarter-Finals")
sf, hsf = run_knockout(qf, "Semi-Finals")

display(HTML(style + "<div class='wc-body'>" + h32 + h16 + hqf + hsf + "</div>"))

In [963]:
# CELL 5: Grand Final & Official Podium
# Define Winner, Runner-up, and 3rd Place clearly
champion = predict_match(sf[0], sf[1], knockout=True)
runner_up = sf[1] if champion == sf[0] else sf[0]

# Logic to determine 3rd place from semi-final losers
third_place = predict_match(qf[0], qf[1], knockout=True) # Representative match for 3rd place

podium_html = f"""
<div class='wc-body'>
<style>
    .podium-section {{ display: flex; justify-content: center; align-items: flex-end; gap: 20px; margin-top: 50px; padding: 50px; background: rgba(17, 34, 64, 0.8); border-radius: 40px; border: 1px solid #64ffda; }}
    .rank-box {{ width: 230px; text-align: center; padding: 25px; border-radius: 20px 20px 10px 10px; color: #000; font-weight: bold; }}
    .first-p {{ background: linear-gradient(135deg, #ffd700, #b8860b); height: 350px; box-shadow: 0 0 35px rgba(255, 215, 0, 0.6); border: 3px solid #fff; }}
    .second-p {{ background: linear-gradient(135deg, #c0c0c0, #708090); height: 260px; border: 2px solid #fff; }}
    .third-p {{ background: linear-gradient(135deg, #cd7f32, #8b4513); height: 200px; border: 2px solid #fff; }}
    .trophy {{ font-size: 50px; display: block; margin-bottom: 10px; }}
</style>

<div class='stage-label'>Official Podium</div>
<div class='podium-section'>
    <div class='rank-box second-p'>
        <div>ü•à Runner-up</div>
        <h3 style='margin:15px 0;'>{runner_up}</h3>
        <div>{get_flag(runner_up).replace('width:25px','width:70px')}</div>
    </div>

    <div class='rank-box first-p'>
        <span class='trophy'>üèÜ</span>
        <div>ü•á WORLD CHAMPION</div>
        <h2 style='font-size:2.2em; margin:15px 0;'>{champion}</h2>
        <div>{get_flag(champion).replace('width:25px','width:110px')}</div>
    </div>

    <div class='rank-box third-p'>
        <div>ü•â 3rd Place</div>
        <h3 style='margin:15px 0;'>{third_place}</h3>
        <div>{get_flag(third_place).replace('width:25px','width:70px')}</div>
    </div>
</div>
</div>"""

display(HTML(podium_html))

In [964]:
# 3. TOURNAMENT TRAJECTORY FUNCTION
def simulate_tournament_trajectory(target_team_name):
    target_team_name = target_team_name.strip()
    if target_team_name not in teams['TEAM_NAME'].values:
        return None, f"Error: '{target_team_name}' not found."

    team_idx = teams[teams['TEAM_NAME'] == target_team_name].index[0]
    team_vector = teams_scaled.iloc[team_idx][final_feature_set].values

    # Ranking teams by Elo to define difficult opponents
    sorted_data = teams_scaled.sort_values(by='Elo_Ratings', ascending=False)

    # Benchmark opponents per stage
    tournament_stages = [
        ("Group Stage", sorted_data.iloc[int(len(sorted_data)*0.5)][final_feature_set].values),
        ("Round of 32", sorted_data.iloc[int(len(sorted_data)*0.3)][final_feature_set].values),
        ("Round of 16", sorted_data.iloc[int(len(sorted_data)*0.15)][final_feature_set].values),
        ("Quarter-Finals", sorted_data.iloc[int(len(sorted_data)*0.08)][final_feature_set].values),
        ("Semi-Finals", sorted_data.iloc[3][final_feature_set].values), # Top 4 Elite
        ("Final", sorted_data.iloc[0][final_feature_set].values)        # Spain (#1 Elo)
    ]

    highest_reach = "Group Stage"
    stage_details = []

    for stage_name, opponent_vector in tournament_stages:
        feature_delta = [team_vector - opponent_vector]
        probs = model.predict_proba(feature_delta)[0]

        # Prob of winning or drawing
        adv_probability = (probs[1] + probs[2]) * 100
        stage_details.append((stage_name, adv_probability))

        if model.predict(feature_delta)[0] >= 1:
            highest_reach = stage_name
        else:
            break

    # Title Label
    if highest_reach == "Final" and target_team_name == sorted_data.iloc[0]['TEAM_NAME']:
        highest_reach = "üèÜ World Cup Champion"

    return highest_reach, stage_details

# 4. BLUE THEME USER INTERFACE (UI)
style = {'description_width': 'initial'}
input_box = widgets.Combobox(
    placeholder='Type a country (e.g., Spain)...',
    options=list(teams['TEAM_NAME'].sort_values().values),
    description='‚öΩ Team:',
    ensure_option=True, style=style
)

button = widgets.Button(description='Predict Path', button_style='info', icon='bolt')
output = widgets.Output()

def on_button_clicked(b):
    with output:
        clear_output()
        team = input_box.value
        if not team: return

        res, details = simulate_tournament_trajectory(team)

        html_content = f"""
        <div style="border: 2px solid #1976d2; padding: 20px; border-radius: 12px; background-color: #f0f7ff; font-family: 'Segoe UI', sans-serif;">
            <h2 style="color: #0d47a1; margin-top: 0; border-bottom: 2px solid #1976d2; padding-bottom: 10px;">üìä AI ANALYSIS: {team.upper()}</h2>
            <p style="font-size: 20px; margin: 15px 0;"><b>Predicted Finish:</b> <span style="color: #1565c0; font-weight: bold;">{res}</span></p>
            <table style="width: 100%; border-collapse: collapse;">
                <tr style="background-color: #1976d2; color: white;">
                    <th style="padding: 12px; text-align: left;">Tournament Stage</th>
                    <th style="padding: 12px; text-align: center;">Success Probability</th>
                    <th style="padding: 12px; text-align: center;">Result</th>
                </tr>
        """
        for stage, prob in details:
            color = "#2e7d32" if prob >= 50 else "#d32f2f"
            marker = "‚úî PASS" if prob >= 50 else "‚úò FAIL"
            html_content += f"""
                <tr style="border-bottom: 1px solid #bbdefb; background-color: white;">
                    <td style="padding: 12px;">{stage}</td>
                    <td style="padding: 12px; text-align: center; font-weight: bold; color: {color};">{prob:.1f}%</td>
                    <td style="padding: 12px; text-align: center; font-weight: bold; color: {color};">{marker}</td>
                </tr>
            """
        html_content += "</table></div>"
        display(HTML(html_content))

button.on_click(on_button_clicked)
print("FIFA 2026 AI PREDICTOR READY")
display(widgets.VBox([widgets.HBox([input_box, button]), output]))

FIFA 2026 AI PREDICTOR READY


VBox(children=(HBox(children=(Combobox(value='', description='‚öΩ Team:', ensure_option=True, options=('Algeria'‚Ä¶