In [173]:
import pandas as pd 
import numpy as np
import altair as alt
import eco_style
alt.themes.enable('light')


ThemeRegistry.enable('light')

In [184]:
df = pd.read_csv("past-euros-finals.csv")
df = df[['date', 'team1', 'team2', 'goals1', 'goals2', 'outcome']]


# Reshaping the DataFrame
reshaped_data = []
for index, row in df.iterrows():
    if pd.notna(row['team1']):
        reshaped_data.append({
            'date': row['date'],
            'team': row['team1'],
            'goals': row['goals1'],
        })
    if pd.notna(row['team2']):
        reshaped_data.append({
            'date': row['date'],
            'team': row['team2'],
            'goals': row['goals2'],
        })

df = pd.DataFrame(reshaped_data)

teams = {
    "spain": {"id": "ESP", "flag": "🇪🇸"},
    "england": {"id": "ENG", "flag": "🏴󠁧󠁢󠁥󠁮󠁧󠁿"},
    "belgium": {"id": "BEL", "flag": "🇧🇪"},
    "germany": {"id": "GER", "flag": "🇩🇪"},
    "denmark": {"id": "DEN", "flag": "🇩🇰"},
    "czechoslovakia": {"id": "TCH", "flag": "🇨🇿"},
    "czech rep": {"id": "CZE", "flag": "🇨🇿"},
    "portugal": {"id": "POR", "flag": "🇵🇹"},
    "greece": {"id": "GRE", "flag": "🇬🇷"},
    "italy": {"id": "ITA", "flag": "🇮🇹"},
    "france": {"id": "FRA", "flag": "🇫🇷"},
    "russia": {"id": "RUS", "flag": "🇷🇺"},
    "netherlands": {"id": "NED", "flag": "🇳🇱"},
    "yugoslavia": {"id": "YUG", "flag": "🇷🇸"}
}

df['flag'] = df['team'].apply(lambda x: teams[x]['flag'])
df['id'] = df['team'].apply(lambda x: teams[x]['id'])
df['id_flag'] = df['flag']+df['id']

winners = {
    "1960-07-10": "russia",
    "1964-06-21": "spain",
    "1968-06-10": "italy",
    "1972-06-18": "germany",
    "1976-06-20": "czechoslovakia",
    "1980-06-22": "germany",
    "1984-06-27": "france",
    "1988-06-25": "netherlands",
    "1992-06-26": "denmark",
    "1996-06-30": "germany",
    "2000-07-02": "france",
    "2004-07-04": "greece",
    "2008-06-29": "spain",
    "2012-07-01": "spain",
    "2016-07-10": "portugal",
    "2021-07-11": "italy",
    "2024-07-14": "spain"
}

df['winner'] = df['date'].apply(lambda x: winners[x] if x in winners else None)
df['won'] = df['team'] == df['winner']

df['goals_halved'] = df['goals'] / 2

df['year'] = df['date'].apply(lambda x: x.split('-')[0])

# df = df[['year', 'id_flag', 'goals', 'won']]

total_goals = df.groupby('year').sum().reset_index()
df['total_goals'] = df['year'].apply(
    lambda x: total_goals[total_goals['year'] == x]['goals'].values[0])
df['lose_pos'] = np.where(
    df.goals > 0, df.total_goals - 0.5 * df.goals, df.total_goals)

# df = df.query("year != '2024'")

df['won'] = df['won'].apply(lambda x: 'Winner' if x else 'Loser')

full_match_df = df.copy()

base = alt.Chart(df).encode(
    y=alt.Y('year:N', title='', axis=alt.Axis(ticks=False,
            domain=False), sort=alt.SortOrder('descending')),
    x=alt.X('sum(goals):Q', title='Goals', axis=alt.Axis(format='d', tickCount=5, offset=5, title=''

                                                         )),
    color=alt.Color('won:N', scale=alt.Scale(domain=['Winner', 'Loser'], range=['rgb(240, 240, 240)', '#ffffff']),
                    legend=alt.Legend(title=None,
                                      values=['Winner'],
                                      labelColor='#676A86',
                                      symbolStrokeWidth=0.5,
                                      orient='top'), sort='descending'),
    order=alt.Order('won:N', sort='descending')
)

bars = base.mark_bar(stroke='black', strokeWidth=0.25)
bars


winner_labels = base.transform_filter(
    "datum.won == 'Winner'"
).mark_text(
    align='center',
    baseline='middle',
    fontSize=10,
    dx=0
).encode(
    text='id_flag:N',
    color=alt.value('black'),
    x=alt.X('goals_halved:Q')
)

loser_labels = base.transform_filter(
    "datum.won == 'Loser'"
).mark_text(
    # if goals > 0 then align center, else align left
    align=alt.expr("datum.goals > 0 ? 'center' : 'left'"),
    baseline='middle',
    fontSize=10,
    # if goals > 0 then dx 3, else dx -3
    dx=alt.expr("datum.goals > 0 ? 0 : 5")
).encode(
    text='id_flag:N',
    color=alt.value('black'),
    x=alt.X('lose_pos:Q'),
    opacity=alt.condition(alt.datum.goals > 0, alt.value(0.6), alt.value(0.6))
)

chart = bars + winner_labels + loser_labels

chart = chart.properties(
    width=400,
    height=500
)


chart.save("finals_goals.png", scale_factor=2.0)
chart.save("finals_goals.json")

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [185]:
df

Unnamed: 0,date,team,goals,flag,id,id_flag,winner,won,goals_halved,year,total_goals,lose_pos
0,2024-07-14,spain,,🇪🇸,ESP,🇪🇸ESP,spain,Winner,,2024,3.0,3.0
1,2024-07-14,england,,🏴󠁧󠁢󠁥󠁮󠁧󠁿,ENG,🏴󠁧󠁢󠁥󠁮󠁧󠁿ENG,spain,Loser,,2024,3.0,3.0
2,1980-06-22,belgium,1.0,🇧🇪,BEL,🇧🇪BEL,germany,Loser,0.5,1980,3.0,2.5
3,1980-06-22,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1980,3.0,2.0
4,1992-06-26,denmark,2.0,🇩🇰,DEN,🇩🇰DEN,denmark,Winner,1.0,1992,2.0,1.0
5,1992-06-26,germany,0.0,🇩🇪,GER,🇩🇪GER,denmark,Loser,0.0,1992,2.0,2.0
6,1976-06-20,czechoslovakia,2.0,🇨🇿,TCH,🇨🇿TCH,czechoslovakia,Winner,1.0,1976,4.0,3.0
7,1976-06-20,germany,2.0,🇩🇪,GER,🇩🇪GER,czechoslovakia,Loser,1.0,1976,4.0,3.0
8,1996-06-30,czech rep,1.0,🇨🇿,CZE,🇨🇿CZE,germany,Loser,0.5,1996,3.0,2.5
9,1996-06-30,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1996,3.0,2.0


In [186]:
chart.properties(
    title=alt.TitleParams(
        "Goals scored in Euro finals",
        subtitle=["Winners and losers, Source: UEFA", ""],
        anchor='start',
        dx=30,
        color='#676A86',
        subtitleFontSize=12
    )
).save("finals_goals_with_flag.json")

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [187]:
df

Unnamed: 0,date,team,goals,flag,id,id_flag,winner,won,goals_halved,year,total_goals,lose_pos
0,2024-07-14,spain,,🇪🇸,ESP,🇪🇸ESP,spain,Winner,,2024,3.0,3.0
1,2024-07-14,england,,🏴󠁧󠁢󠁥󠁮󠁧󠁿,ENG,🏴󠁧󠁢󠁥󠁮󠁧󠁿ENG,spain,Loser,,2024,3.0,3.0
2,1980-06-22,belgium,1.0,🇧🇪,BEL,🇧🇪BEL,germany,Loser,0.5,1980,3.0,2.5
3,1980-06-22,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1980,3.0,2.0
4,1992-06-26,denmark,2.0,🇩🇰,DEN,🇩🇰DEN,denmark,Winner,1.0,1992,2.0,1.0
5,1992-06-26,germany,0.0,🇩🇪,GER,🇩🇪GER,denmark,Loser,0.0,1992,2.0,2.0
6,1976-06-20,czechoslovakia,2.0,🇨🇿,TCH,🇨🇿TCH,czechoslovakia,Winner,1.0,1976,4.0,3.0
7,1976-06-20,germany,2.0,🇩🇪,GER,🇩🇪GER,czechoslovakia,Loser,1.0,1976,4.0,3.0
8,1996-06-30,czech rep,1.0,🇨🇿,CZE,🇨🇿CZE,germany,Loser,0.5,1996,3.0,2.5
9,1996-06-30,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1996,3.0,2.0


# Goal by Goal

In [188]:
match_df

Unnamed: 0,date,team,goals,flag,id,id_flag,winner,won,goals_halved,year,total_goals,lose_pos
2,1980-06-22,belgium,1.0,🇧🇪,BEL,🇧🇪BEL,germany,Loser,0.5,1980,3.0,2.5
3,1980-06-22,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1980,3.0,2.0
4,1992-06-26,denmark,2.0,🇩🇰,DEN,🇩🇰DEN,denmark,Winner,1.0,1992,2.0,1.0
5,1992-06-26,germany,0.0,🇩🇪,GER,🇩🇪GER,denmark,Loser,0.0,1992,2.0,2.0
6,1976-06-20,czechoslovakia,2.0,🇨🇿,TCH,🇨🇿TCH,czechoslovakia,Winner,1.0,1976,4.0,3.0
7,1976-06-20,germany,2.0,🇩🇪,GER,🇩🇪GER,czechoslovakia,Loser,1.0,1976,4.0,3.0
8,1996-06-30,czech rep,1.0,🇨🇿,CZE,🇨🇿CZE,germany,Loser,0.5,1996,3.0,2.5
9,1996-06-30,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1996,3.0,2.0
10,2004-07-04,portugal,0.0,🇵🇹,POR,🇵🇹POR,greece,Loser,0.0,2004,1.0,1.0
11,2004-07-04,greece,1.0,🇬🇷,GRE,🇬🇷GRE,greece,Winner,0.5,2004,1.0,0.5


In [230]:
df = pd.read_csv("finals_goals.csv")
#Year	Team	Scorer	Minute	FIFA Code
#date	team	goals	flag	id	id_flag	winner
df = df.rename(columns={"Year": "year", "Team": "team", "Scorer": "scorer", "Minute": "minute", "FIFA Code": "id"})
match_df = full_match_df.copy()
match_df['year'] = match_df['date'].str.split('-').str[0].astype(int)

df = df.merge(match_df[['year',  'id', 'id_flag', 'won']], on=['year', 'id'], how='left')
flags = {
    f['id']: f['flag'] for i, f in full_match_df.iterrows()
}

df['flag'] = df['id'].apply(lambda x: flags[x])

base = alt.Chart(df).encode(
    y=alt.Y('year:N', title='', axis=alt.Axis(ticks=False, 
                                              grid=True,
                                              domain=False), sort=alt.SortOrder('descending')),
    x=alt.X('minute:Q', title='Minute', axis=alt.Axis(format='d', tickCount=5,offset=5, title='')),
    color=alt.value('#d0d0d0'),
    stroke=alt.Stroke('won:N', scale=alt.Scale(domain=['Winner', 'Loser'], range=['#36B7B4', '#E6224B']), legend=alt.Legend(title=None,
                                                                                                                                        values=['Winner', 'Loser'],
                                                                                                                                        labelColor='#676A86',
                                                                                                                                        symbolStrokeWidth=0.5,
                                                                                                                                        orient='top'), sort='descending'),
)


circles = base.mark_circle(size=250, stroke='black', strokeWidth=1).encode(
    opacity=alt.condition(alt.datum.year == '2024', alt.value(1), alt.value(0.7)),
    tooltip=[
        {'type': 'nominal', 'field': 'team', 'title': 'Team'},
        {'type': 'nominal', 'field': 'scorer', 'title': 'Scorer'},
        {'type': 'quantitative', 'field': 'minute', 'title': 'Minute'},
        {'type': 'nominal', 'field': 'id_flag', 'title': 'Team'},
    ]
)




flags = base.mark_text(size=10, dx=1, align='center', baseline='middle'
                       ).encode(
    text='flag:N',
        tooltip=[
        {'type': 'nominal', 'field': 'team', 'title': 'Team'},
        {'type': 'nominal', 'field': 'scorer', 'title': 'Scorer'},
        {'type': 'quantitative', 'field': 'minute', 'title': 'Minute'},
        {'type': 'nominal', 'field': 'id_flag', 'title': 'Team'},
    ]

)


# add a rule at 90 minutes

rule = alt.Chart(pd.DataFrame({'x': [90]})).mark_rule(strokeDash=[3, 3], opacity=0.7).encode(x='x:Q')


chart = (rule + circles +flags).resolve_scale(y='shared')

chart = chart.properties(
    height=600,
    width=450,
    title=alt.TitleParams(
        "Goals scored in Euro finals",
        subtitle=["by minute of game, Source: UEFA", ""],
        anchor='start',
        dx=30,
        color='#676A86',
        subtitleFontSize=12
    )
)

chart.save("finals_goals_by_minute.png", scale_factor=2.0)
chart.save("finals_goals_by_minute.json")

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [209]:
flags = {
    f['id']: f['flag'] for i, f in df.iterrows()
}
flags

{'RUS': '🇷🇺',
 'YUG': '🇷🇸',
 'ESP': '🇪🇸',
 'ITA': '🇮🇹',
 'GER': '🇩🇪',
 'TCH': '🇨🇿',
 'BEL': '🇧🇪',
 'FRA': '🇫🇷',
 'NED': '🇳🇱',
 'DEN': '🇩🇰',
 'CZE': '🇨🇿',
 'GRE': '🇬🇷',
 'POR': '🇵🇹',
 'ENG': '🏴\U000e0067'}

In [206]:
df

Unnamed: 0,year,team,scorer,minute,id,id_flag,won,flag
0,1960,Soviet Union,Khusainov,13,RUS,🇷🇺RUS,Winner,🇷
1,1960,Soviet Union,Ponedelnik,113,RUS,🇷🇺RUS,Winner,🇷
2,1960,Yugoslavia,Galic,43,YUG,🇷🇸YUG,Loser,🇷
3,1964,Spain,Pereda,6,ESP,🇪🇸ESP,Winner,🇪
4,1964,Spain,Marcelino,84,ESP,🇪🇸ESP,Winner,🇪
5,1964,Soviet Union,Khusainov,8,RUS,🇷🇺RUS,Loser,🇷
6,1968,Italy,Riva,12,ITA,🇮🇹ITA,Winner,🇮
7,1968,Italy,Anastasi,31,ITA,🇮🇹ITA,Winner,🇮
8,1972,West Germany,Gerd Müller,27,GER,🇩🇪GER,Winner,🇩
9,1972,West Germany,Gerd Müller,58,GER,🇩🇪GER,Winner,🇩


In [190]:
match_df.id.unique()

array(['ESP', 'ENG', 'BEL', 'GER', 'DEN', 'TCH', 'CZE', 'POR', 'GRE',
       'ITA', 'FRA', 'RUS', 'NED', 'YUG'], dtype=object)

In [152]:
match_df.columns

Index(['date', 'team', 'goals', 'flag', 'id', 'id_flag', 'winner', 'won',
       'goals_halved', 'year', 'total_goals', 'lose_pos'],
      dtype='object')

In [171]:
match_df

Unnamed: 0,date,team,goals,flag,id,id_flag,winner,won,goals_halved,year,total_goals,lose_pos
2,1980-06-22,belgium,1.0,🇧🇪,BEL,🇧🇪BEL,germany,Loser,0.5,1980,3.0,2.5
3,1980-06-22,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1980,3.0,2.0
4,1992-06-26,denmark,2.0,🇩🇰,DEN,🇩🇰DEN,denmark,Winner,1.0,1992,2.0,1.0
5,1992-06-26,germany,0.0,🇩🇪,GER,🇩🇪GER,denmark,Loser,0.0,1992,2.0,2.0
6,1976-06-20,czechoslovakia,2.0,🇨🇿,TCH,🇨🇿TCH,czechoslovakia,Winner,1.0,1976,4.0,3.0
7,1976-06-20,germany,2.0,🇩🇪,GER,🇩🇪GER,czechoslovakia,Loser,1.0,1976,4.0,3.0
8,1996-06-30,czech rep,1.0,🇨🇿,CZE,🇨🇿CZE,germany,Loser,0.5,1996,3.0,2.5
9,1996-06-30,germany,2.0,🇩🇪,GER,🇩🇪GER,germany,Winner,1.0,1996,3.0,2.0
10,2004-07-04,portugal,0.0,🇵🇹,POR,🇵🇹POR,greece,Loser,0.0,2004,1.0,1.0
11,2004-07-04,greece,1.0,🇬🇷,GRE,🇬🇷GRE,greece,Winner,0.5,2004,1.0,0.5


In [22]:
df.team.unique()

array(['spain', 'england', 'belgium', 'germany', 'denmark',
       'czechoslovakia', 'czech rep', 'portugal', 'greece', 'italy',
       'france', 'russia', 'netherlands', 'yugoslavia'], dtype=object)

In [21]:
df.date.unique()

array(['2024-07-14', '1980-06-22', '1992-06-26', '1976-06-20',
       '1996-06-30', '2004-07-04', '2012-07-01', '2021-07-11',
       '2000-07-02', '1988-06-25', '2016-07-10', '1972-06-18',
       '1964-06-21', '2008-06-29', '1984-06-27', '1960-07-10',
       '1968-06-10'], dtype=object)

In [15]:
df.team.unique()

array(['spain', 'england', 'belgium', 'germany', 'denmark',
       'czechoslovakia', 'czech rep', 'portugal', 'greece', 'italy',
       'france', 'russia', 'netherlands', 'yugoslavia'], dtype=object)

In [None]:


# Reshaping the DataFrame
reshaped_data = []
for index, row in df.iterrows():
    if pd.notna(row['team1']):
        reshaped_data.append({
            'date': row['date'],
            'team': row['team1'],
            'goals': row['goals1'],
            'won': 1 if row['outcome'] == 1 else (0 if row['outcome'] == 0 else -1)
        })
    if pd.notna(row['team2']):
        reshaped_data.append({
            'date': row['date'],
            'team': row['team2'],
            'goals': row['goals2'],
            'won': 1 if row['outcome'] == -1 else (0 if row['outcome'] == 0 else -1)
        })

reshaped_df = pd.DataFrame(reshaped_data)
df = reshaped_df

df
