In [1]:
import pandas as pd
import numpy as np
import altair as alt 
import eco_style
import pycountry
alt.themes.enable('light')

ThemeRegistry.enable('light')

In [3]:
df = pd.read_csv("euros-sim-summary-2024-06-12 (1).csv")
df = df.iloc[:, 1:]

series = {
    'Freq.l16' : "Last 16",
    'Freq.qf' : "Quarter Finals",
    'Freq.sf' : "Semi Finals",
    'Freq.f' : "Final",
    'Freq' : "Win"
}

df = df.rename(columns=series)

df['Win'] = df['Win'].fillna(0)

unmelted_df = df.copy()

df = df.melt(id_vars=["team1", "elostrength1"], var_name='series')

fifa_codes = {  "france": "FRA",  "spain": "ESP",  "netherlands": "NED",  "belgium": "BEL",  "england": "ENG",  "portugal": "POR",  "italy": "ITA",  "germany": "GER",  "croatia": "CRO",  "denmark": "DEN",  "switzerland": "SUI",  "ukraine": "UKR",  "austria": "AUT",  "serbia": "SRB",  "hungary": "HUN",  "poland": "POL",  "turkey": "TUR",  "czech rep": "CZE",  "scotland": "SCO",  "slovenia": "SVN",  "romania": "ROU",  "slovakia": "SVK",  "georgia": "GEO",  "albania": "ALB"}

df['fifa_code'] = df['team1'].map(fifa_codes)

df = df.merge(unmelted_df[['team1', 'elostrength1', 'Win', 'Last 16']], on=['team1', 'elostrength1'])

original_df = df.copy()

df


Unnamed: 0,team1,elostrength1,series,value,fifa_code,Win,Last 16
0,france,1411.494438,Win,19.74,FRA,19.74,93.38
1,france,1411.494438,Last 16,93.38,FRA,19.74,93.38
2,france,1411.494438,Quarter Finals,75.23,FRA,19.74,93.38
3,france,1411.494438,Semi Finals,49.86,FRA,19.74,93.38
4,france,1411.494438,Final,31.60,FRA,19.74,93.38
...,...,...,...,...,...,...,...
115,albania,1036.987683,Win,0.01,ALB,0.01,6.60
116,albania,1036.987683,Last 16,6.60,ALB,0.01,6.60
117,albania,1036.987683,Quarter Finals,0.81,ALB,0.01,6.60
118,albania,1036.987683,Semi Finals,0.12,ALB,0.01,6.60


In [4]:

base = alt.Chart(df).encode(
    x=alt.X('elostrength1:Q',
            axis=alt.Axis(titleY=25),
            scale=alt.Scale(zero=False),
             title='Elo rating before tournament'),
    y=alt.Y('value:Q', 
            axis=alt.Axis(titleY=-5),
            title="Probability"),
    color=alt.Color('series:N',
                    # legend=alt.Legend(orient="none", 
                    #                   direction="horizontal",
                    #                   legendY=-35,
                    #                   title=None),
                    legend=None
    ),
    tooltip=['team1', 'series', 'value']
)

points = base.mark_circle()

labels = base.transform_filter("datum.series == 'Last 16'").mark_text(
    align='left',
    baseline='middle',
    dx=alt.expr("datum.fifa_code == 'POR' ? -10 : 5"),
    dy=alt.expr("datum.fifa_code == 'POR' ? -10 : datum.fifa_code == 'ROU'? -5 : datum.fifa_code == 'SVK' ? 5 : datum.fifa_code == 'FRA' ? -5 : 0"),
).encode(
    text='fifa_code:N',
    color=alt.value('#676A86'),
    opacity=alt.value(0.8)
)

rules = base.transform_filter("datum.series == 'Last 16'").mark_rule(
     strokeDash=[1,5],
).encode(
    y=alt.Y('Win:Q'),
    y2='Last 16',
    size=alt.value(1.25),
    # color=alt.value('#676A86'),
    color=alt.value("black"),
    opacity=alt.value(1)
)

end_labels = base.transform_filter("datum.fifa_code == 'FRA'").mark_text(
    align='left',
    baseline='middle',
    dx=5,
    fontSize=12,
    dy=alt.expr("datum.series == 'Last 16' ? 5 : 0"),
).encode(
    text='series:N',
)

chart = (points + labels + rules + end_labels)

chart = chart.properties(
    width=600,
    height=300
)

chart.save("euros_pred.json")
chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [91]:
df.query("fifa_code == 'GEO'")

Unnamed: 0,team1,elostrength1,series,value,fifa_code,Win,Last 16
110,georgia,1059.168816,Win,,GEO,,29.16
111,georgia,1059.168816,Last 16,29.16,GEO,,29.16
112,georgia,1059.168816,Quarter Finals,2.98,GEO,,29.16
113,georgia,1059.168816,Semi Finals,0.46,GEO,,29.16
114,georgia,1059.168816,Final,0.04,GEO,,29.16


# Update

In [30]:

flag_emoji = {
  "france": "üá´üá∑",
  "spain": "üá™üá∏",
  "netherlands": "üá≥üá±",
  "belgium": "üáßüá™",
  "england": "üè¥Û†ÅßÛ†Å¢Û†Å•Û†ÅÆÛ†ÅßÛ†Åø",
  "portugal": "üáµüáπ",
  "italy": "üáÆüáπ",
  "germany": "üá©üá™",
  "croatia": "üá≠üá∑",
  "denmark": "üá©üá∞",
  "switzerland": "üá®üá≠",
  "ukraine": "üá∫üá¶",
  "austria": "üá¶üáπ",
  "serbia": "üá∑üá∏",
  "hungary": "üá≠üá∫",
  "poland": "üáµüá±",
  "turkey": "üáπüá∑",
  "czech rep": "üá®üáø",
  "scotland": "üè¥Û†ÅßÛ†Å¢Û†Å≥Û†Å£Û†Å¥Û†Åø",
  "slovenia": "üá∏üáÆ",
  "romania": "üá∑üá¥",
  "slovakia": "üá∏üá∞",
  "georgia": "üá¨üá™",
  "albania": "üá¶üá±"
}

df = pd.read_csv("euros-sim-summary-2024-06-19.csv")
series = {
    'Freq.l16' : "Last 16",
    'Freq.qf' : "Quarter Finals",
    'Freq.sf' : "Semi Finals",
    'Freq.f' : "Final",
    'Freq' : "Win"
}

df = df.rename(columns=series)

df['Win'] = df['Win'].fillna(0)
df = df[['team1', 'Last 16']]
updated_df = df.copy()

df = pd.merge(unmelted_df[['team1', 'elostrength1', 'Last 16']] , df, on='team1', suffixes=('_before', '_after'))

df['flag'] = df['team1'].map(flag_emoji)

base = alt.Chart(df).encode(
    x=alt.X('Last 16_before:Q',
            axis=alt.Axis(titleY=25),
            scale=alt.Scale(zero=False),
             title='Start probability'),
    y=alt.Y('Last 16_after:Q', 
            axis=alt.Axis(titleY=-5),
            title="Probability now"),
    color=alt.Color('team1:N',
                    legend=None
    )
)

circles = base.mark_circle()

flag = base.mark_text(
    align='left',
    baseline='middle',
    dx=5,
    dy=0,
    fontSize=20
).encode(
    text='flag:N',
)

diagonal = pd.DataFrame({'x': [0, df['Last 16_before'].max()], 'y': [0, df['Last 16_after'].max()]})
diagonal = alt.Chart(diagonal).mark_line(
    strokeDash=[1,5],
    color='#676A86').encode(
    x='x:Q',
    y='y:Q'
)

labels = base.mark_text(
    align='left',
    baseline='middle',
    dx=5,
    dy=0,
).encode(
    text='team1:N',
    color=alt.value('#676A86'),
    opacity=alt.value(0.8)
)

chart = (flag  + diagonal )

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [22]:
df.team1.unique()

array(['france', 'spain', 'netherlands', 'belgium', 'england', 'portugal',
       'italy', 'germany', 'croatia', 'denmark', 'switzerland', 'ukraine',
       'austria', 'serbia', 'hungary', 'poland', 'turkey', 'czech rep',
       'scotland', 'slovenia', 'romania', 'slovakia', 'georgia',
       'albania'], dtype=object)

In [10]:
updated_df

Unnamed: 0,team1,Win
0,france,21.05
1,spain,16.64
2,netherlands,13.12
3,belgium,5.92
4,england,10.89
5,portugal,9.19
6,italy,8.23
7,germany,7.86
8,croatia,3.0
9,denmark,0.99
