## Setup

### Plotly Template

In [2]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the base template
base = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#FFF5CC',
        plot_bgcolor='#FFF5CC',
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='#888888',  # Darker grid lines
            tickfont=dict(
                size=24,
                family='Open Sans, sans-serif'  # Use Open Sans font
            ),
            titlefont=dict(
                size=26,
                family='Open Sans, sans-serif'  # Use Open Sans font
            ),
            linecolor='#333333',
            linewidth=2  # Adjust the thickness of the x-axis line
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='#888888',  # Darker grid lines
            tickfont=dict(
                size=24,
                family='Open Sans, sans-serif'
            ),
            titlefont=dict(
                size=26,
                family='Open Sans, sans-serif'
            ),
            linecolor='#333333',
            linewidth=2  # Adjust the thickness of the y-axis line
        ),
        font=dict(
            color='#333333',
            size=28,
            family='Open Sans, sans-serif'
        ),
        colorway=["#348273", "#280F3C", "#CC5500", "#333333", "#2E86C1", "#9B59B6", "#28B463", "#F39C12", "#E74C3C", "#3498DB"],
        title=go.layout.Title(
            text='',
            font=dict(
                size=34,
                color='#333333',
                family='Open Sans, sans-serif'
            ),
            x=0.05,
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=6)  # Set the line width for scatter plots
            )
        ]
    )
)

# Register the base template
pio.templates['base'] = base
pio.templates.default = 'base'

### d_olympics

In [13]:
import pandas as pd
from IPython.display import display, HTML

# Load the CSV files into DataFrames
d_country_colours = pd.read_csv('d_country_colours.csv', names=["ioc_code", "colour"], header=None)
d_country_flags = pd.read_csv('d_country_flags.csv')
d_gdp_ppp = pd.read_csv('d_gdp_ppp.csv')
d_ioc_codes = pd.read_csv('d_ioc_codes.csv')
d_num_athletes = pd.read_csv('d_num_athletes.csv')
d_pop_1564 = pd.read_csv('d_pop_1564.csv')

# Exclude Gibraltar and Channel Islands from d_pop_1564
d_pop_1564 = d_pop_1564[~d_pop_1564['country'].isin(['Gibraltar', 'Channel Islands'])]

# Convert gdp_ppp to numeric
d_gdp_ppp['gdp_ppp'] = pd.to_numeric(d_gdp_ppp['gdp_ppp'], errors='coerce')

# Merge all dataframes using d_ioc_codes as the base
d_olympics = d_ioc_codes \
    .merge(d_country_colours, on='ioc_code', how='left') \
    .merge(d_country_flags, on='ioc_code', how='left') \
    .merge(d_gdp_ppp, on='ioc_code', how='left') \
    .merge(d_num_athletes, on='ioc_code', how='left') \
    .merge(d_pop_1564, on='ioc_code', how='left')

# Rename and clean up columns
d_olympics = d_olympics.rename(columns={'country_x': 'country'}).drop(columns=['country_y', 'country'])

# Calculate the total world population and total number of athletes
total_world_population = 5210000000  # Example value
total_athletes = 10000               # Example value

# Calculate share_pop_1564 for each country
d_olympics['share_pop_1564'] = d_olympics['pop_1564'] / total_world_population * 100

# Calculate share_athletes for each country
d_olympics['share_athletes'] = d_olympics['num_athletes'] / total_athletes * 100

# Calculate participation_score for each country
d_olympics['participation_score'] = d_olympics['share_athletes'] / d_olympics['share_pop_1564']

# Order the DataFrame by participation_score in descending order
d_olympics = d_olympics.sort_values(by='participation_score', ascending=False)

# Display the DataFrame as an HTML table with a scrollable element
html = d_olympics.to_html(classes='table table-striped', index=False)

# Custom CSS to make the table scrollable
html = f"""
<div style="max-height: 500px; overflow-y: scroll; border: 1px solid #ddd;">
{html}
</div>
"""

display(HTML(html))


ioc_code,colour,flag,gdp_ppp,num_athletes,pop_1564,share_pop_1564,share_athletes,participation_score
MON,#ce1126,🇲🇨,,6.0,18490.0,0.000355,0.06,169.064359
TUV,#002b7f,🇹🇻,65680000.0,2.0,7020.0,0.000135,0.02,148.433048
PLW,#002b7f,🇵🇼,315850000.0,3.0,12400.0,0.000238,0.03,126.048387
SMR,#5eb6e4,🇸🇲,2217650000.0,5.0,22470.0,0.000431,0.05,115.932354
BER,#00008b,🇧🇲,6796160000.0,8.0,40900.0,0.000785,0.08,101.90709
SAM,#ce1126,🇼🇸,1507690000.0,24.0,129100.0,0.002478,0.24,96.855151
IVB,#00008b,🇻🇬,,4.0,23920.0,0.000459,0.04,87.123746
MHL,#00205b,🇲🇭,314610000.0,4.0,26580.0,0.00051,0.04,78.404816
NRU,#00205b,🇳🇷,161930000.0,1.0,7490.0,0.000144,0.01,69.559413
SKN,#ce1126,🇰🇳,1595150000.0,3.0,33510.0,0.000643,0.03,46.642793


### f_medals_daily

In [4]:
import pandas as pd
from IPython.display import display, HTML

# Load the raw data into a dataframe
df = pd.read_csv('f_medals_daily.csv')

# Function to convert date to dd-mm-yyyy format
def convert_date(date_str):
    return pd.to_datetime(date_str, format='%m/%d/%Y').strftime('%d-%m-%Y')

# Function to determine the gender based on the sub-discipline name
def determine_gender(sub_discipline):
    """
    Determine the gender category from the sub_discipline name.
    Looks for specific keywords: "women's", "men's", and uses default 'mix' otherwise.
    """
    # Convert sub_discipline to lowercase for case-insensitive comparison
    sub_discipline_lower = sub_discipline.lower()

    # Determine gender based on specific keywords
    if "women's" in sub_discipline_lower or 'women' in sub_discipline_lower or 'woman' in sub_discipline_lower or 'female' in sub_discipline_lower:
        return 'women'
    elif "men's" in sub_discipline_lower or "men" in sub_discipline_lower or "male" in sub_discipline_lower:
        return 'men'
    else:
        return 'mix'

# Initialize lists to store the processed data
date_list = []
name_list = []
country_list = []
discipline_list = []
sub_discipline_list = []
num_gold_list = []
num_silver_list = []
num_bronze_list = []
gender_list = []  # Add a list to store gender information

# Process each row in the original dataframe
for index, row in df.iterrows():
    date = convert_date(row['Date'])
    discipline = row['Discipline']
    sub_discipline = row['Sub-discipline']
    gender = determine_gender(sub_discipline)  # Determine gender based on sub-discipline
    
    medals = [
        ('Gold', row['Gold Name'], row['Gold Country']),
        ('Silver', row['Silver Name'], row['Silver Country']),
        ('Bronze', row['Bronze Name'], row['Bronze Country'])
    ]
    
    for medal_type, name, country in medals:
        if pd.notna(name) and pd.notna(country):
            date_list.append(date)
            name_list.append(name)
            country_list.append(country)
            discipline_list.append(discipline)
            sub_discipline_list.append(sub_discipline)
            gender_list.append(gender)  # Append the gender to the list
            
            # Assign medal counts
            num_gold_list.append(1 if medal_type == 'Gold' else 0)
            num_silver_list.append(1 if medal_type == 'Silver' else 0)
            num_bronze_list.append(1 if medal_type == 'Bronze' else 0)

# Create the f_medals_daily dataframe
f_medals_daily = pd.DataFrame({
    'date': date_list,
    'name': name_list,
    'country': country_list,
    'discipline': discipline_list,
    'sub_discipline': sub_discipline_list,
    'num_gold': num_gold_list,
    'num_silver': num_silver_list,
    'num_bronze': num_bronze_list,
    'gender': gender_list  # Add the gender column to the DataFrame
})

# Display the dataframe as a scrollable element
display(HTML(f"""
<style>
    .dataframe-div {{
        max-height: 400px;
        overflow: auto;
    }}
</style>
<div class="dataframe-div">{f_medals_daily.to_html(index=False)}</div>
"""))

date,name,country,discipline,sub_discipline,num_gold,num_silver,num_bronze,gender
27-07-2024,Kazakhstan,KAZ,Shooting,10m Air Rifle Mixed Team Bronze Medal Match,0,0,1,mix
27-07-2024,China,CHN,Shooting,10m Air Rifle Mixed Team Gold Medal Match,1,0,0,mix
27-07-2024,Korea,KOR,Shooting,10m Air Rifle Mixed Team Gold Medal Match,0,1,0,mix
27-07-2024,China,CHN,Diving,Women's Synchronised 3m Springboard Final,1,0,0,women
27-07-2024,United States,USA,Diving,Women's Synchronised 3m Springboard Final,0,1,0,women
27-07-2024,Great Britain,GBR,Diving,Women's Synchronised 3m Springboard Final,0,0,1,women
27-07-2024,Grace BROWN,AUS,Cycling Road,Women's Individual Time Trial,1,0,0,women
27-07-2024,Anna HENDERSON,GBR,Cycling Road,Women's Individual Time Trial,0,1,0,women
27-07-2024,Chloe DYGERT,USA,Cycling Road,Women's Individual Time Trial,0,0,1,women
27-07-2024,Remco EVENEPOEL,BEL,Cycling Road,Men's Individual Time Trial,1,0,0,men


### rbt_medals_daily

In [5]:
import pandas as pd
from IPython.display import display, HTML

def create_reporting_table(f_medals_daily, d_olympics):
    """
    Creates a reporting table by left joining f_medals_daily with d_olympics, adds a total_medals column and a score column.
    
    Parameters:
        f_medals_daily (pd.DataFrame): The daily medals data.
        d_olympics (pd.DataFrame): The Olympics data containing additional country information.
    
    Returns:
        pd.DataFrame: The resulting reporting table with total_medals and score columns.
    """
    # Perform a left join between f_medals_daily (country) and d_olympics (ioc_code)
    rbt_medals_daily = pd.merge(f_medals_daily, d_olympics, left_on='country', right_on='ioc_code', how='left')

    # Add a total_medals column
    rbt_medals_daily['total_medals'] = rbt_medals_daily['num_gold'] + rbt_medals_daily['num_silver'] + rbt_medals_daily['num_bronze']

    # Add a score column
    rbt_medals_daily['score'] = (rbt_medals_daily['num_gold'] * 4) + (rbt_medals_daily['num_silver'] * 2) + (rbt_medals_daily['num_bronze'] * 1)

    # Reorder columns to match the specified order
    rbt_medals_daily = rbt_medals_daily[[
        'date', 'name', 'ioc_code', 'country', 'flag', 'gdp_ppp', 'colour',
        'num_athletes', 'pop_1564', 'discipline', 'sub_discipline',
        'num_gold', 'num_silver', 'num_bronze', 'total_medals', 'score', 'gender'
    ]]

    # Return the resulting DataFrame
    return rbt_medals_daily

# Assuming f_medals_daily and d_olympics DataFrames are already created
# Call the function to create the reporting table
rbt_medals_daily = create_reporting_table(f_medals_daily, d_olympics)

# Display the resulting table as an HTML scrollable element
display(HTML(f"""
<style>
    .dataframe-div {{
        max-height: 400px;
        overflow: auto;
    }}
</style>
<div class="dataframe-div">{rbt_medals_daily.to_html(index=False)}</div>
"""))


date,name,ioc_code,country,flag,gdp_ppp,colour,num_athletes,pop_1564,discipline,sub_discipline,num_gold,num_silver,num_bronze,total_medals,score,gender
27-07-2024,Kazakhstan,KAZ,KAZ,🇰🇿,782723100000.0,#00afca,79.0,12372740.0,Shooting,10m Air Rifle Mixed Team Bronze Medal Match,0,0,1,1,1,mix
27-07-2024,China,CHN,CHN,🇨🇳,34643710000000.0,#de2910,388.0,972620250.0,Shooting,10m Air Rifle Mixed Team Gold Medal Match,1,0,0,1,4,mix
27-07-2024,Korea,KOR,KOR,🇰🇷,2794196000000.0,#696969,141.0,36398720.0,Shooting,10m Air Rifle Mixed Team Gold Medal Match,0,1,0,1,2,mix
27-07-2024,China,CHN,CHN,🇨🇳,34643710000000.0,#de2910,388.0,972620250.0,Diving,Women's Synchronised 3m Springboard Final,1,0,0,1,4,women
27-07-2024,United States,USA,USA,🇺🇸,27360940000000.0,#3c3b6e,592.0,216814690.0,Diving,Women's Synchronised 3m Springboard Final,0,1,0,1,2,women
27-07-2024,Great Britain,GBR,GBR,🇬🇧,4026241000000.0,#00247d,327.0,43282690.0,Diving,Women's Synchronised 3m Springboard Final,0,0,1,1,1,women
27-07-2024,Grace BROWN,AUS,AUS,🇦🇺,1841116000000.0,#FFCD00,461.0,17257990.0,Cycling Road,Women's Individual Time Trial,1,0,0,1,4,women
27-07-2024,Anna HENDERSON,GBR,GBR,🇬🇧,4026241000000.0,#00247d,327.0,43282690.0,Cycling Road,Women's Individual Time Trial,0,1,0,1,2,women
27-07-2024,Chloe DYGERT,USA,USA,🇺🇸,27360940000000.0,#3c3b6e,592.0,216814690.0,Cycling Road,Women's Individual Time Trial,0,0,1,1,1,women
27-07-2024,Remco EVENEPOEL,BEL,BEL,🇧🇪,832972000000.0,#000000,165.0,7515290.0,Cycling Road,Men's Individual Time Trial,1,0,0,1,4,men


### High-level Results

In [6]:
import pandas as pd
from IPython.display import display, HTML

def high_level_results(rbt_medals_daily):
    """
    Produces a high-level overview of the number of medals for each country,
    including their flag, and displays it as an HTML table.
    
    Parameters:
        rbt_medals_daily (pd.DataFrame): The DataFrame containing the daily medal results.
    
    Returns:
        pd.DataFrame: A DataFrame summarizing the number of medals for each country, ordered by gold, silver, bronze, and total medals.
    """
    # Group by country and flag, then sum the medal counts
    medal_summary = rbt_medals_daily.groupby(['country', 'flag']).agg(
        gold_medals=('num_gold', 'sum'),
        silver_medals=('num_silver', 'sum'),
        bronze_medals=('num_bronze', 'sum')
    ).reset_index()

    # Calculate total medals
    medal_summary['total_medals'] = medal_summary['gold_medals'] + medal_summary['silver_medals'] + medal_summary['bronze_medals']

    # Sort the summary by gold, silver, bronze, and total medals
    medal_summary = medal_summary.sort_values(by=['gold_medals', 'silver_medals', 'bronze_medals', 'total_medals'], ascending=[False, False, False, False])

    # Convert the summary DataFrame to HTML
    html = medal_summary.to_html(index=False, escape=False)

    # Display the summary as a scrollable HTML table
    display(HTML(f"""
    <style>
        .dataframe-div {{
            max-height: 500px;
            overflow-y: auto;
            border: 1px solid #ddd;
        }}
    </style>
    <div class="dataframe-div">{html}</div>
    """))

# # Assuming rbt_medals_daily DataFrame is already created
# # Call the function to get the high-level results
# high_level_results(rbt_medals_daily)

In [7]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

def plot_medal_trends(
    rbt_medals_daily, 
    metric, 
    top_x_countries=10, 
    plotly_template='plotly', 
    show_markers=True, 
    override_color=None, 
    height=720, 
    width=1280, 
    transparent_background=False,
    transparent_override=False,
    exclude_countries_override=None
):
    """
    Creates a line plot showing the trend of a specified metric over time, grouped by country.
    The plot is colored based on the 'colour' column unless overridden, the legend is ordered by the cumulative metric,
    and markers can be optionally added to each data point. The x-axis is hidden, but vertical grid lines are shown.

    Parameters:
        rbt_medals_daily (pd.DataFrame): The DataFrame containing the daily medal results.
        metric (str): The metric to plot (e.g., 'num_gold', 'num_silver', 'num_bronze', 'total_medals').
        top_x_countries (int): The number of top countries to include in the plot.
        plotly_template (str): The Plotly template to use for the plot.
        show_markers (bool): Whether to show markers on the plot (default is True).
        override_color (str): A hex color code or rgba code to override all country colors (default is None).
        height (int): The height of the plot (default is 720).
        width (int): The width of the plot (default is 1280).
        transparent_background (bool): Whether to make the background transparent (default is False).
        transparent_override (bool): Whether to make the line colors transparent (default is False).

    Returns:
        fig (plotly.graph_objs._figure.Figure): The resulting line plot.
    """
    # Ensure the date column is in datetime format
    rbt_medals_daily['date'] = pd.to_datetime(rbt_medals_daily['date'], format='%d-%m-%Y')

    # Calculate the cumulative sum of the metric for each country over time
    rbt_medals_daily['cumulative_metric'] = rbt_medals_daily.groupby(['country'])[metric].cumsum()

    # Aggregate the cumulative metrics by date, country, flag, and colour
    summary = rbt_medals_daily.groupby(['date', 'country', 'flag', 'colour']).agg({'cumulative_metric': 'last'}).reset_index()

    # Filter to include only the top_x_countries based on the total cumulative metric on the last date
    top_countries = summary.groupby('country').agg({'cumulative_metric': 'last'}).nlargest(top_x_countries, 'cumulative_metric').index
    summary = summary[summary['country'].isin(top_countries)]

    # Sort countries by cumulative metric to order the legend and reverse order for proper overlay
    country_order = summary.groupby('country')['cumulative_metric'].max().sort_values(ascending=True).index

    # Determine the mode based on whether markers should be shown
    mode = 'lines+markers' if show_markers else 'lines'

    # Access the specified Plotly template
    template = pio.templates[plotly_template]

    # Get background colors from the template
    default_paper_bgcolor = template.layout.paper_bgcolor if template.layout.paper_bgcolor else 'white'
    default_plot_bgcolor = template.layout.plot_bgcolor if template.layout.plot_bgcolor else 'white'

    # Create a line plot using Plotly Graph Objects
    fig = go.Figure()

    # Add traces in reverse order for correct overlapping
    for country in country_order[::-1]:
        country_data = summary[summary['country'] == country]
        legend_name = f"{country_data['flag'].iloc[0]} {country}"
        if transparent_override:
            line_color = 'rgba(0, 0, 0, 0)'  # Fully transparent color
        else:
            line_color = override_color if override_color else country_data['colour'].iloc[0]
        
        fig.add_trace(go.Scatter(
            x=country_data['date'],
            y=country_data['cumulative_metric'],
            mode=mode,
            name=legend_name,
            line=dict(color=line_color),
            marker=dict(size=12) if show_markers else None,
            showlegend=True  # Don't show legend yet
        ))

    # Re-add traces in correct order for the legend, but with no lines (only for legend)
    for country in country_order[::-1]:
        country_data = summary[summary['country'] == country]
        legend_name = f"{country_data['flag'].iloc[0]} {country}"
        if transparent_override:
            line_color = 'rgba(0, 0, 0, 0)'  # Fully transparent color
        else:
            if override_color and (exclude_countries_override is None or country not in exclude_countries_override):
                line_color = override_color
            else:
                line_color = country_data['colour'].iloc[0]
        
        fig.add_trace(go.Scatter(
            x=country_data['date'],
            y=country_data['cumulative_metric'],
            mode=mode,
            name=legend_name,
            line=dict(color=line_color),
            marker=dict(size=12) if show_markers else None,
            showlegend=False  # Don't show legend yet
        ))

    # Update layout to hide the x-axis labels but include vertical grid lines
    fig.update_layout(
        title=f'Trend of {metric.replace("_", " ").title()} Over Time for Top {top_x_countries} Countries',
        yaxis_title=None,  # Hide the y-axis title
        legend_title='Country',
        template=plotly_template,
        font=dict(family="Montserrat, sans-serif"),
        height=height,  # Set the height of the plot
        width=width,    # Set the width of the plot
        xaxis=dict(
            showticklabels=False,  # Hide x-axis labels
            showgrid=True,         # Show vertical grid lines
            gridcolor='lightgray'  # Grid line color
        ),
        yaxis=dict(
            showticklabels=True,  # Show y-axis values
            showgrid=True         # Optionally, show horizontal grid lines
        ),
        paper_bgcolor='rgba(0,0,0,0)' if transparent_background else default_paper_bgcolor,
        plot_bgcolor='rgba(0,0,0,0)' if transparent_background else default_plot_bgcolor
    )

    # Show the plot
    fig.show()

In [8]:
metric = 'num_gold'

plot_medal_trends(
    rbt_medals_daily=rbt_medals_daily,
    metric=metric,
    top_x_countries=10,
    plotly_template='base',
    show_markers=True,
    override_color=None,
    height=720,
    width=1280,
    transparent_background=False,
    transparent_override=False
)

# plot_medal_trends(
#     rbt_medals_daily=rbt_medals_daily,
#     metric=metric,
#     top_x_countries=10,
#     plotly_template='base',
#     show_markers=True,
#     override_color='#D3D3D3',
#     height=720,
#     width=1280,
#     transparent_background=True,
#     transparent_override=False
# )

# plot_medal_trends(
#     rbt_medals_daily=rbt_medals_daily,
#     metric=metric,
#     top_x_countries=10,
#     plotly_template='base',
#     show_markers=True,
#     override_color=None,
#     height=720,
#     width=1280,
#     transparent_background=True,
#     transparent_override=False
# )

## Participation Trophy

In [28]:
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objs as go

def plot_participation_vs_population(
    d_olympics, 
    min_population=0, 
    flag_size=20, 
    transparent=False, 
    show_results=True,
    plotly_template='base',
    flag_countries=None,  # List of IOC codes for countries to display as flags, or None to show all flags
    height=720, 
    width=1280
):
    """
    Plot a scatter plot with participation_score on the x-axis and num_athletes on the y-axis.
    Selected countries are shown as flags, while others are shown as bubbles. 
    If flag_countries is None, flags for all countries are shown. Includes an option to filter countries by a minimum population threshold.

    Parameters:
    - d_olympics: DataFrame containing the necessary columns for plotting.
    - min_population: Integer specifying the minimum population threshold for countries to be included in the plot.
    - flag_size: Integer specifying the font size for displaying country flags.
    - transparent: Boolean specifying whether to make the background transparent.
    - show_results: Boolean specifying whether to display the flags.
    - plotly_template: String specifying which Plotly template to use (default is 'base').
    - flag_countries: List of IOC codes for countries to display as flags (default is None, which shows all flags).
    - height: Integer specifying the height of the plot (default is 720).
    - width: Integer specifying the width of the plot (default is 1280).
    """

    # Apply the minimum population threshold
    d_olympics_metadata_filtered = d_olympics[
        (d_olympics['pop_1564'] >= min_population) &
        (d_olympics['flag'].notna())  # Exclude NaN flags
    ]

    # If flag_countries is None, show flags for all countries
    if flag_countries is None:
        flag_data = d_olympics_metadata_filtered
        bubble_data = pd.DataFrame(columns=d_olympics_metadata_filtered.columns)  # Empty DataFrame for bubbles
    else:
        flag_data = d_olympics_metadata_filtered[d_olympics_metadata_filtered['ioc_code'].isin(flag_countries)]
        bubble_data = d_olympics_metadata_filtered[~d_olympics_metadata_filtered['ioc_code'].isin(flag_countries)]

    # Access the specified Plotly template
    template = pio.templates[plotly_template]

    # Get background colors from the template if not transparent
    default_plot_bgcolor = template.layout.plot_bgcolor if template.layout.plot_bgcolor else 'white'
    default_paper_bgcolor = template.layout.paper_bgcolor if template.layout.paper_bgcolor else 'white'

    plot_bg_color = 'rgba(0,0,0,0)' if transparent else default_plot_bgcolor
    paper_bg_color = 'rgba(0,0,0,0)' if transparent else default_paper_bgcolor

    # Create a scatter plot for bubbles
    fig = px.scatter(
        bubble_data,
        x='participation_score',
        y='num_athletes',
        hover_name='ioc_code',
        title='Participation Score vs. Number of Athletes',
        labels={'participation_score': 'Participation Score', 'num_athletes': 'Number of Athletes'},
    )

    # Add annotations for each point using flags if show_results is True
    if show_results and not flag_data.empty:
        for i, row in flag_data.iterrows():
            fig.add_annotation(
                x=row['participation_score'],
                y=row['num_athletes'],
                text=row['flag'],
                showarrow=False,
                font=dict(size=flag_size),  # Adjust font size for flags
                align='center'
            )

    # Update layout using the custom template, apply background color, and set height/width
    fig.update_layout(
        template=plotly_template,  # Use the provided Plotly template
        title=dict(
            text='Participation Score vs. Number of Athletes',
            font=dict(size=40)
        ),
        xaxis=dict(title='Participation Score'),
        yaxis=dict(title='Number of Athletes'),
        showlegend=False,
        plot_bgcolor=plot_bg_color,  # Set the plot background color dynamically
        paper_bgcolor=paper_bg_color,  # Set the paper (overall) background color dynamically
        height=height,  # Set the height of the plot
        width=width    # Set the width of the plot
    )

    # Show plot
    fig.show()

In [29]:
# Comprehensive list of Pacific IOC codes
caribbean_ioc_codes = [
    'ANT',  # Antigua and Barbuda
    'ARU',  # Aruba
    'BAH',  # Bahamas
    'BAR',  # Barbados
    'BER',  # Bermuda
    'CAY',  # Cayman Islands
    'CUB',  # Cuba
    'DMA',  # Dominica
    'DOM',  # Dominican Republic
    'GRN',  # Grenada
    'GUY',  # Guyana
    'HAI',  # Haiti
    'ISV',  # Virgin Islands
    'IVB',  # British Virgin Islands
    'JAM',  # Jamaica
    'LCA',  # Saint Lucia
    'SKN',  # Saint Kitts and Nevis
    'SUR',  # Suriname
    'TTO',  # Trinidad and Tobago
    'VIN',  # Saint Vincent and the Grenadines
    'PUR',  # Puerto Rico
]

pacific_ioc_codes = [
    'ASA',  # American Samoa
    'AUS',  # Australia
    'COK',  # Cook Islands
    'FIJ',  # Fiji
    'FSM',  # Federated States of Micronesia
    'GUM',  # Guam
    'KIR',  # Kiribati
    'MHL',  # Marshall Islands
    'NRU',  # Nauru
    'NZL',  # New Zealand
    'PLW',  # Palau
    'PNG',  # Papua New Guinea
    'SAM',  # Samoa
    'SOL',  # Solomon Islands
    'TGA',  # Tonga
    'TUV',  # Tuvalu
    'VAN',  # Vanuatu
]

plot_participation_vs_population(d_olympics, min_population=0, flag_size=50, transparent=False, show_results=True, flag_countries=None, height=720, width=1280)
# plot_participation_vs_population(d_olympics, min_population=0, flag_size=50, transparent=True, show_results=True, flag_countries=['MON'], height=720, width=1280)
# plot_participation_vs_population(d_olympics, min_population=0, flag_size=50, transparent=True, show_results=True, flag_countries=d_ioc_codes, height=720, width=1280)
# plot_participation_vs_population(d_olympics, min_population=0, flag_size=50, transparent=True, show_results=True, flag_countries=pacific_ioc_codes, height=720, width=1280)
# plot_participation_vs_population(d_olympics, min_population=0, flag_size=50, transparent=True, show_results=True, flag_countries=caribbean_ioc_codes, height=720, width=1280)

## Ass Kickers

In [30]:
import pandas as pd

def pretty_print_filtered_events_medal_tally(f_medals_daily, filtered_events, filter_equal_medals=False):
    """
    Prints a summary of the medal tally for the specified filtered events, ordered by gold, silver, and bronze medals,
    and also includes the total medal count for each country across all events. Includes an option to filter countries
    where Medals = Total Medals.

    Parameters:
    - f_medals_daily: DataFrame containing daily medal data with columns
                      ['date', 'name', 'country', 'discipline', 'sub_discipline',
                       'num_gold', 'num_silver', 'num_bronze'].
    - filtered_events: List of disciplines to be filtered and considered for the tally.
    - filter_equal_medals: Boolean specifying whether to filter countries where Medals = Total Medals.
    """
    
    # Filter the DataFrame for the specified filtered events
    selected_data_filtered_events = f_medals_daily[f_medals_daily['discipline'].isin(filtered_events)]
    
    # Group data by country for filtered events
    grouped_data_filtered_events = selected_data_filtered_events.groupby('country').agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()
    
    # Calculate total medals for filtered events
    grouped_data_filtered_events['total'] = grouped_data_filtered_events['num_gold'] + grouped_data_filtered_events['num_silver'] + grouped_data_filtered_events['num_bronze']

    # Order by gold, silver, and bronze medals for filtered events
    grouped_data_filtered_events = grouped_data_filtered_events.sort_values(by=['num_gold', 'num_silver', 'num_bronze'], ascending=[False, False, False]).reset_index(drop=True)

    # Group data by country for all events
    grouped_data_all_events = f_medals_daily.groupby('country').agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()
    
    # Calculate total medals for all events
    grouped_data_all_events['total'] = grouped_data_all_events['num_gold'] + grouped_data_all_events['num_silver'] + grouped_data_all_events['num_bronze']

    # Merge the two grouped dataframes
    merged_data = pd.merge(grouped_data_filtered_events, grouped_data_all_events, on='country', suffixes=('_filtered', '_all'))

    # Apply filter if required
    if filter_equal_medals:
        merged_data = merged_data[merged_data['total_filtered'] > 0.5 * merged_data['total_all']]


    # Pretty print the results
    print("\nMedal Tally for Filtered Events:")
    print("-" * 80)
    print("{:<20} {:>6} {:>6} {:>6} {:>6} {:>12}".format('Country', 'Gold', 'Silver', 'Bronze', 'Medals', 'Total Medals'))
    print("-" * 80)

    for _, row in merged_data.iterrows():
        country = row['country']
        num_gold_filtered = row['num_gold_filtered']
        num_silver_filtered = row['num_silver_filtered']
        num_bronze_filtered = row['num_bronze_filtered']
        total_filtered = row['total_filtered']
        total_all = row['total_all']
        
        print("{:<20} {:>6} {:>6} {:>6} {:>6} {:>12}".format(
            country, num_gold_filtered, num_silver_filtered, num_bronze_filtered, total_filtered, total_all
        ))

    print("-" * 80)
    total_gold_filtered = grouped_data_filtered_events['num_gold'].sum()
    total_silver_filtered = grouped_data_filtered_events['num_silver'].sum()
    total_bronze_filtered = grouped_data_filtered_events['num_bronze'].sum()
    total_medals_filtered = total_gold_filtered + total_silver_filtered + total_bronze_filtered

    print(f"Filtered: {total_gold_filtered:>2} Gold, {total_silver_filtered:>2} Silver, {total_bronze_filtered:>2} Bronze, {total_medals_filtered:>2} Medals\n")


In [31]:
# Rich events
rich_events = [
    "Equestrian",
    "Fencing",
    "Golf",
    "Modern Pentathlon",
    "Sailing",
    # "Shooting"
]

# New events
new_events = [
    "Breaking",
    "Basketball 3x3",
    "Cycling BMX Freestyle",
    "Skateboarding",
    "Sport Climbing",
    "Surfing",
    "Rugby Sevens"
]

# Ass kicking events
ass_kicking_events = [
    "Boxing",
    "Judo",
    "Taekwondo",
    "Wrestling"
]

pretty_print_filtered_events_medal_tally(f_medals_daily, ass_kicking_events, filter_equal_medals=True)


Medal Tally for Filtered Events:
--------------------------------------------------------------------------------
Country                Gold Silver Bronze Medals Total Medals
--------------------------------------------------------------------------------
UZB                       8      1      3     12           13
IRI                       3      6      3     12           12
GEO                       2      3      1      6            7
AZE                       2      2      3      7            7
CUB                       2      1      5      8            9
BUL                       2      0      2      4            7
KAZ                       1      2      2      5            7
TUN                       1      0      1      2            3
KGZ                       0      2      4      6            6
TUR                       0      2      4      6            8
MDA                       0      1      2      3            4
KOS                       0      1      1      2            

## Per Capita Champ

In [32]:
import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

def plot_medal_trends_per_capita(
    rbt_medals_daily, 
    metric, 
    top_x_countries=10, 
    min_count=0,
    normalize_by='population',  # 'population' or 'gdp'
    scale_factor=1_000_000,     # Default to per million
    plotly_template='base', 
    show_markers=True, 
    override_color=None, 
    height=720, 
    width=1280, 
    transparent_background=False,
    transparent_override=False
):
    """
    Creates a line plot showing the trend of a specified metric over time, normalized by the population (pop_1564) 
    or GDP (gdp_ppp), grouped by country, and scaled by the specified factor (e.g., per million people). 
    The plot is colored based on the 'colour' column unless overridden, the legend is ordered by the cumulative metric,
    and markers can be optionally added to each data point. The x-axis is hidden, but vertical grid lines are shown.

    Parameters:
        rbt_medals_daily (pd.DataFrame): The DataFrame containing the daily medal results, including 'pop_1564' 
                                         and 'gdp_ppp' columns.
        metric (str): The metric to plot per unit (e.g., 'num_gold', 'num_silver', 'num_bronze', 'total_medals').
        top_x_countries (int): The number of top countries to include in the plot.
        min_count (int): The minimum count of the specified metric for a country to be included (default is 0).
        normalize_by (str): The column to normalize by, either 'population' or 'gdp' (default is 'population').
        scale_factor (int): The factor to scale the normalized metric by (e.g., 1_000_000 for per million).
        plotly_template (str): The Plotly template to use for the plot.
        show_markers (bool): Whether to show markers on the plot (default is True).
        override_color (str): A hex color code or rgba code to override all country colors (default is None).
        height (int): The height of the plot (default is 720).
        width (int): The width of the plot (default is 1280).
        transparent_background (bool): Whether to make the background transparent (default is False).
        transparent_override (bool): Whether to make the line colors transparent (default is False).

    Returns:
        fig (plotly.graph_objs._figure.Figure): The resulting line plot.
    """
    # Ensure the date column is in datetime format
    rbt_medals_daily['date'] = pd.to_datetime(rbt_medals_daily['date'], format='%d-%m-%Y')

    # Filter countries based on the minimum count for the specified metric
    country_metric_totals = rbt_medals_daily.groupby('country')[metric].sum()
    eligible_countries = country_metric_totals[country_metric_totals >= min_count].index
    rbt_medals_daily = rbt_medals_daily[rbt_medals_daily['country'].isin(eligible_countries)].copy()

    # Determine the normalization column and label
    if normalize_by == 'gdp':
        norm_column = 'gdp_ppp'
        norm_label = f'Per {scale_factor:,} USD PPP'
    else:
        norm_column = 'pop_1564'
        norm_label = f'Per {scale_factor:,} People (15-64)'

    # Calculate the normalized metric per unit using the selected normalization column
    rbt_medals_daily.loc[:, 'metric_per_unit'] = (rbt_medals_daily[metric] / rbt_medals_daily[norm_column]) * scale_factor

    # Calculate the cumulative sum of the normalized metric per unit for each country over time
    rbt_medals_daily.loc[:, 'cumulative_metric_per_unit'] = rbt_medals_daily.groupby(['country'])['metric_per_unit'].cumsum()

    # Aggregate the cumulative metrics per unit by date, country, flag, and colour
    summary = rbt_medals_daily.groupby(['date', 'country', 'flag', 'colour']).agg({'cumulative_metric_per_unit': 'last'}).reset_index()

    # Filter to include only the top_x_countries based on the total cumulative metric per unit on the last date
    top_countries = summary.groupby('country').agg({'cumulative_metric_per_unit': 'last'}).nlargest(top_x_countries, 'cumulative_metric_per_unit').index
    summary = summary[summary['country'].isin(top_countries)]

    # Sort countries by cumulative metric per unit to order the legend and reverse order for proper overlay
    country_order = summary.groupby('country')['cumulative_metric_per_unit'].max().sort_values(ascending=False).index

    # Determine the mode based on whether markers should be shown
    mode = 'lines+markers' if show_markers else 'lines'

    # Access the specified Plotly template
    template = pio.templates[plotly_template]

    # Get background colors from the template
    default_paper_bgcolor = template.layout.paper_bgcolor if template.layout.paper_bgcolor else 'white'
    default_plot_bgcolor = template.layout.plot_bgcolor if template.layout.plot_bgcolor else 'white'

    # Create a line plot using Plotly Graph Objects
    fig = go.Figure()

    # Add traces in reverse order for correct overlapping
    for country in country_order[::-1]:
        country_data = summary[summary['country'] == country]
        legend_name = f"{country_data['flag'].iloc[0]} {country}"
        if transparent_override:
            line_color = 'rgba(0, 0, 0, 0)'  # Fully transparent color
        else:
            line_color = override_color if override_color else country_data['colour'].iloc[0]
        
        fig.add_trace(go.Scatter(
            x=country_data['date'],
            y=country_data['cumulative_metric_per_unit'],
            mode=mode,
            name=legend_name,
            line=dict(color=line_color),
            marker=dict(size=12) if show_markers else None,
            showlegend=False  # Don't show legend yet
        ))

    # Re-add traces in correct order for the legend, but with no lines (only for legend)
    for country in country_order:
        country_data = summary[summary['country'] == country]
        legend_name = f"{country_data['flag'].iloc[0]} {country}"
        legend_color = override_color if override_color else country_data['colour'].iloc[0]
        fig.add_trace(go.Scatter(
            x=[None],  # No x values
            y=[None],  # No y values
            mode='lines',
            name=legend_name,
            line=dict(color=legend_color),
            showlegend=True  # Show legend for this dummy trace
        ))

    # Update layout to hide the x-axis labels but include vertical grid lines
    fig.update_layout(
        title=f'Trend of {metric.replace("_", " ").title()} {norm_label} Over Time for Top {top_x_countries} Countries',
        yaxis_title=None,  # Hide the y-axis title
        legend_title='Country',
        template=plotly_template,
        font=dict(family="Montserrat, sans-serif"),
        height=height,  # Set the height of the plot
        width=width,    # Set the width of the plot
        xaxis=dict(
            showticklabels=False,  # Hide x-axis labels
            showgrid=True,         # Show vertical grid lines
            gridcolor='lightgray'  # Grid line color
        ),
        yaxis=dict(
            showticklabels=True,  # Show y-axis values
            showgrid=True         # Optionally, show horizontal grid lines
        ),
        paper_bgcolor='rgba(0,0,0,0)' if transparent_background else default_paper_bgcolor,
        plot_bgcolor='rgba(0,0,0,0)' if transparent_background else default_plot_bgcolor
    )

    # Show the plot
    fig.show()

# # Example usage
# plot_medal_trends_per_capita(rbt_medals_daily, metric='total_medals', min_count=5, top_x_countries=10, normalize_by='population', scale_factor=1_000_000)
# plot_medal_trends_per_capita(rbt_medals_daily, metric='total_medals', min_count=5, top_x_countries=10, normalize_by='gdp', scale_factor=100_000_000_000)
# plot_medal_trends_per_capita(rbt_medals_daily, metric='total_medals', min_count=1, top_x_countries=10, normalize_by='gdp', scale_factor=100_000_000_000)

In [33]:
plot_medal_trends_per_capita(
    rbt_medals_daily, 
    metric='total_medals',
    top_x_countries=10, 
    min_count=5,
    normalize_by='population',  # 'population' or 'gdp'
    scale_factor=1_000_000,     # Default to per million
    plotly_template='base', 
    show_markers=True, 
    override_color=None, 
    height=720, 
    width=1280, 
    transparent_background=False,
    transparent_override=False
)

# plot_medal_trends_per_capita(
#     rbt_medals_daily, 
#     metric='total_medals',
#     top_x_countries=10, 
#     min_count=5,
#     normalize_by='population',  # 'population' or 'gdp'
#     scale_factor=1_000_000,     # Default to per million
#     plotly_template='base', 
#     show_markers=False, 
#     override_color=None, 
#     height=720, 
#     width=1280, 
#     transparent_background=True,
#     transparent_override=True
# )

# plot_medal_trends_per_capita(
#     rbt_medals_daily, 
#     metric='total_medals',
#     top_x_countries=10, 
#     min_count=1,
#     normalize_by='gdp',  # 'population' or 'gdp'
#     scale_factor=100_000_000_000,     # Default to per million
#     plotly_template='base', 
#     show_markers=False, 
#     override_color=None, 
#     height=720, 
#     width=1280, 
#     transparent_background=True,
#     transparent_override=False
# )

In [34]:
import plotly.graph_objects as go

def plot_scatterplot_per_capita(
    rbt_medals_daily, 
    metric, 
    normalize_by='population',  # 'population' or 'gdp'
    scale_factor=1_000_000,     # Default to per million
    top_x_countries=10,
    min_count=0,
    plotly_template='base', 
    height=720, 
    width=1280
):
    """
    Creates a scatter plot with the per capita metric on the y-axis, the raw metric on the x-axis,
    and the flag emoji as the marker.

    Parameters:
        rbt_medals_daily (pd.DataFrame): The DataFrame containing the daily medal results, including 'pop_1564' 
                                         and 'gdp_ppp' columns.
        metric (str): The metric to plot on the x-axis (e.g., 'num_gold', 'num_silver', 'num_bronze', 'total_medals').
        normalize_by (str): The column to normalize by, either 'population' or 'gdp' (default is 'population').
        scale_factor (int): The factor to scale the normalized metric by (e.g., 1_000_000 for per million).
        top_x_countries (int): The number of top countries to include in the plot.
        min_count (int): The minimum count of the specified metric for a country to be included (default is 0).
        plotly_template (str): The Plotly template to use for the plot.
        height (int): The height of the plot (default is 720).
        width (int): The width of the plot (default is 1280).

    Returns:
        fig (plotly.graph_objs._figure.Figure): The resulting scatter plot.
    """
    # Filter countries based on the minimum count for the specified metric
    country_metric_totals = rbt_medals_daily.groupby('country')[metric].sum()
    eligible_countries = country_metric_totals[country_metric_totals >= min_count].index
    rbt_medals_daily = rbt_medals_daily[rbt_medals_daily['country'].isin(eligible_countries)].copy()

    # Determine the normalization column and label
    if normalize_by == 'gdp':
        norm_column = 'gdp_ppp'
        norm_label = f'Per {scale_factor:,} USD PPP'
    else:
        norm_column = 'pop_1564'
        norm_label = f'Per {scale_factor:,} People (15-64)'

    # Calculate the normalized metric per unit using the selected normalization column
    rbt_medals_daily.loc[:, 'metric_per_unit'] = (rbt_medals_daily[metric] / rbt_medals_daily[norm_column]) * scale_factor

    # Aggregate metrics by country
    summary = rbt_medals_daily.groupby(['country', 'flag']).agg({
        'metric_per_unit': 'sum',
        metric: 'sum'
    }).reset_index()

    # Filter to include only the top_x_countries based on the per capita metric
    top_countries = summary.nlargest(top_x_countries, 'metric_per_unit').reset_index(drop=True)

    # Create the scatter plot with Plotly Graph Objects
    fig = go.Figure()

    # Add traces using flag emojis as text markers
    fig.add_trace(go.Scatter(
        x=top_countries[metric],
        y=top_countries['metric_per_unit'],
        mode='text',  # Use text mode to display the flag emojis
        text=top_countries['flag'],
        textfont=dict(size=50),  # Set the font size to control the emoji size
        hovertemplate=(
            f'<b>%{{text}}</b><br><br>'
            f'{metric.replace("_", " ").title()}: %{{x}}<br>'
            f'{metric.replace("_", " ").title()} {norm_label}: %{{y:.2f}}<extra></extra>'
        )
    ))

    # Update layout for aesthetics
    fig.update_layout(
        title=f'Scatter Plot of {metric.replace("_", " ").title()} vs. {norm_label}',
        xaxis_title=metric.replace("_", " ").title(),
        yaxis_title=f'{metric.replace("_", " ").title()} {norm_label}',
        template=plotly_template,
        font=dict(family="Montserrat, sans-serif"),
        height=height,
        width=width,
        xaxis=dict(showgrid=True, zeroline=False),
        yaxis=dict(showgrid=True, zeroline=False),
    )

    # Show the plot
    fig.show()

# Example usage
# plot_scatterplot_per_capita(rbt_medals_daily, metric='total_medals', normalize_by='gdp', scale_factor=100_000_000_000, top_x_countries=10, min_count=5)
# plot_scatterplot_per_capita(rbt_medals_daily, metric='total_medals', normalize_by='gdp', scale_factor=100_000_000_000, top_x_countries=20, min_count=1)

## Women's Champ

In [35]:
import pandas as pd
from IPython.display import display, HTML

def greatest_share_women_medals(f_medals_daily, top_x=5, min_medal_threshold=0, sort_by='medals_women_share'):
    """
    Determine the countries with the greatest share of their medals coming from women or women/mixed events.

    Parameters:
    - f_medals_daily: DataFrame containing daily medal data with columns
                      ['date', 'name', 'country', 'discipline', 'sub_discipline',
                       'num_gold', 'num_silver', 'num_bronze', 'gender'].
    - top_x: Integer specifying the number of top countries to display based on the specified sorting column.
    - min_medal_threshold: Integer specifying the minimum number of total medals a country must have to be included.
    - sort_by: String specifying the column to sort by.

    Returns:
    - DataFrame with the top countries based on the specified sorting column.
    """
    # Group by country and gender to get the total medals
    grouped = f_medals_daily.groupby(['country', 'gender']).agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()

    # Calculate total medals for each country
    grouped['total_medals'] = grouped['num_gold'] + grouped['num_silver'] + grouped['num_bronze']
    
    # Pivot the table to have separate columns for each gender
    pivot = grouped.pivot(index='country', columns='gender', values=['num_gold', 'total_medals']).fillna(0)
    
    # Flatten the multi-level columns
    pivot.columns = ['_'.join(col).strip() for col in pivot.columns.values]

    # Calculate total medals and gold medals per country
    pivot['total'] = pivot.filter(like='total_medals').sum(axis=1)
    pivot['total_gold'] = pivot.filter(like='num_gold').sum(axis=1)

    # Create a mapping between sort_by options and their corresponding medal columns
    sort_by_mapping = {
        'medals_women_share': 'total_medals_women',
        'medals_mixed_share': 'total_medals_mix',
        'medals_men_share': 'total_medals_men',
        'gold_women_share': 'num_gold_women',
        'gold_mixed_share': 'num_gold_mix',
        'gold_men_share': 'num_gold_men'
    }

    # Determine the column to apply the threshold on
    threshold_column = sort_by_mapping[sort_by]

    # Apply the minimum medal threshold based on the selected metric
    pivot = pivot[pivot[threshold_column] >= min_medal_threshold]

    # Apply the minimum medal threshold
    pivot = pivot[pivot['total'] >= min_medal_threshold]

    # Calculate the share of medals from women or women/mixed
    pivot['medals_women_share'] = pivot['total_medals_women'] / pivot['total']
    pivot['medals_mixed_share'] = pivot['total_medals_mix'] / pivot['total']
    pivot['medals_men_share'] = pivot['total_medals_men'] / pivot['total']
    
    pivot['gold_women_share'] = pivot['num_gold_women'] / pivot['total_gold']
    pivot['gold_mixed_share'] = pivot['num_gold_mix'] / pivot['total_gold']
    pivot['gold_men_share'] = pivot['num_gold_men'] / pivot['total_gold']

    # Print the column names to troubleshoot
    print("Pivot Columns:", pivot.columns)

    # Sort countries by the specified column in descending order
    pivot_sorted = pivot.sort_values(by=sort_by, ascending=False).head(top_x)

    # Select and order the columns to display
    columns_to_display = [
        'total_medals_women', 'total_medals_mix', 'total_medals_men',
        'num_gold_women', 'num_gold_mix', 'num_gold_men',
        'medals_women_share', 'medals_mixed_share', 'medals_men_share',
        'gold_women_share', 'gold_mixed_share', 'gold_men_share'
    ]

    # Pretty print the results as an HTML scrollable table
    html_output = pivot_sorted[columns_to_display].to_html(max_rows=top_x, notebook=True)
    display(HTML(f"""
    <style>
        .dataframe-div {{
            max-height: 400px;
            overflow: auto;
        }}
    </style>
    <div class="dataframe-div">{html_output}</div>
    """))

    return pivot_sorted

# Example usage
top_countries = greatest_share_women_medals(f_medals_daily, top_x=20, min_medal_threshold=5, sort_by='gold_women_share')

Pivot Columns: Index(['num_gold_men', 'num_gold_mix', 'num_gold_women', 'total_medals_men',
       'total_medals_mix', 'total_medals_women', 'total', 'total_gold',
       'medals_women_share', 'medals_mixed_share', 'medals_men_share',
       'gold_women_share', 'gold_mixed_share', 'gold_men_share'],
      dtype='object')


Unnamed: 0_level_0,total_medals_women,total_medals_mix,total_medals_men,num_gold_women,num_gold_mix,num_gold_men,medals_women_share,medals_mixed_share,medals_men_share,gold_women_share,gold_mixed_share,gold_men_share
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NZL,14.0,1.0,5.0,8.0,0.0,2.0,0.7,0.05,0.25,0.8,0.0,0.2
AUS,30.0,3.0,20.0,13.0,0.0,5.0,0.566038,0.056604,0.377358,0.722222,0.0,0.277778
CAN,17.0,2.0,8.0,6.0,1.0,2.0,0.62963,0.074074,0.296296,0.666667,0.111111,0.222222
USA,67.0,8.0,51.0,26.0,1.0,13.0,0.531746,0.063492,0.404762,0.65,0.025,0.325
ITA,14.0,3.0,23.0,7.0,2.0,3.0,0.35,0.075,0.575,0.583333,0.166667,0.25
KOR,17.0,5.0,10.0,7.0,1.0,5.0,0.53125,0.15625,0.3125,0.538462,0.076923,0.384615
NED,20.0,3.0,11.0,8.0,1.0,6.0,0.588235,0.088235,0.323529,0.533333,0.066667,0.4
GBR,27.0,8.0,30.0,6.0,2.0,6.0,0.415385,0.123077,0.461538,0.428571,0.142857,0.428571
CHN,47.0,10.0,34.0,17.0,6.0,17.0,0.516484,0.10989,0.373626,0.425,0.15,0.425
JPN,17.0,5.0,23.0,7.0,1.0,12.0,0.377778,0.111111,0.511111,0.35,0.05,0.6


## Undisputed Champ

In [36]:
plot_medal_trends(
    rbt_medals_daily=rbt_medals_daily,
    metric='num_gold',
    top_x_countries=10,
    plotly_template='base',
    show_markers=True,
    override_color=None,
    height=720,
    width=1280,
    transparent_background=False,
    transparent_override=False
)

plot_medal_trends(
    rbt_medals_daily=rbt_medals_daily,
    metric='total_medals',
    top_x_countries=10,
    plotly_template='base',
    show_markers=True,
    override_color=None,
    height=720,
    width=1280,
    transparent_background=False,
    transparent_override=False
)

plot_medal_trends(
    rbt_medals_daily=rbt_medals_daily,
    metric='score',
    top_x_countries=10,
    plotly_template='base',
    show_markers=True,
    override_color=None,
    height=720,
    width=1280,
    transparent_background=False,
    transparent_override=False
)

In [37]:
import pandas as pd
from IPython.display import display, HTML

# Assuming your dataset is in a CSV file
data = pd.read_csv('f_medals_overview.csv')

# Calculate the score for each country/year
data['Score'] = data['Gold'] * 4 + data['Silver'] * 2 + data['Bronze'] * 1

# Get the total score for each year
total_score_per_year = data.groupby('Year')['Score'].sum().reset_index()
total_score_per_year.rename(columns={'Score': 'TotalScore'}, inplace=True)

# Merge to get the total score of each year with the main data
data = pd.merge(data, total_score_per_year, on='Year')

# Calculate the share of the score
data['Share'] = data['Score'] / data['TotalScore']

# Sort the data by year and share in descending order
data_sorted = data.sort_values(by=['Year', 'Share'], ascending=[True, False])

# Extract the top 5 countries for each year
top_5 = data_sorted.groupby('Year').head(5)

# Create the desired output format
output = top_5.groupby('Year').apply(lambda x: pd.Series({
    'first_country': x.iloc[0]['Nation'],
    'first_score': x.iloc[0]['Share'],
    'second_country': x.iloc[1]['Nation'] if len(x) > 1 else None,
    'second_score': x.iloc[1]['Share'] if len(x) > 1 else None,
    'third_country': x.iloc[2]['Nation'] if len(x) > 2 else None,
    'third_score': x.iloc[2]['Share'] if len(x) > 2 else None,
    'fourth_country': x.iloc[3]['Nation'] if len(x) > 3 else None,
    'fourth_score': x.iloc[3]['Share'] if len(x) > 3 else None,
    'fifth_country': x.iloc[4]['Nation'] if len(x) > 4 else None,
    'fifth_score': x.iloc[4]['Share'] if len(x) > 4 else None,
})).reset_index()

# Pretty print the results as an HTML scrollable table
html_output = output.to_html(index=False, max_rows=None)
display(HTML(f"""
<style>
    .dataframe-div {{
        max-height: 400px;
        overflow: auto;
    }}
</style>
<div class="dataframe-div">{html_output}</div>
"""))






Year,first_country,first_score,second_country,second_score,third_country,third_score,fourth_country,fourth_score,fifth_country,fifth_score
1896,Greece,0.323129,United States,0.204082,Germany,0.122449,France,0.102041,Great Britain,0.054422
1900,France,0.334333,United States,0.178411,Great Britain,0.124438,Mixed team,0.071964,Belgium,0.062969
1904,United States,0.809955,Germany,0.048265,Canada,0.028658,Cuba,0.0181,Hungary,0.016591
1908,Great Britain,0.479632,United States,0.1682,Sweden,0.072273,France,0.051248,Canada,0.036794
1912,United States,0.222376,Sweden,0.219613,Great Britain,0.118785,Finland,0.084254,Germany,0.073204
1920,United States,0.233333,Sweden,0.134286,Great Britain,0.094286,Finland,0.084762,Belgium,0.084762
1924,United States,0.295583,Finland,0.10419,France,0.10419,Great Britain,0.083805,Switzerland,0.061155
1928,United States,0.183007,Germany,0.088889,Finland,0.07451,Sweden,0.067974,France,0.064052
1932,United States,0.31954,Italy,0.096552,Sweden,0.067816,France,0.066667,Germany,0.058621
1936,Germany,0.254658,United States,0.15528,Italy,0.069358,Finland,0.049689,Hungary,0.048654


In [38]:
import pandas as pd

def top_disciplines_by_country(f_medals_daily, country, top_x, metric='total'):
    # Filter the dataframe for the given country
    country_df = f_medals_daily[f_medals_daily['country'] == country]

    # Count the medals by discipline
    discipline_counts = country_df.groupby('discipline').agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()

    # Add a 'total' column
    discipline_counts['total'] = (discipline_counts['num_gold'] + 
                                  discipline_counts['num_silver'] + 
                                  discipline_counts['num_bronze'])

    # Rename columns for clarity
    discipline_counts.rename(columns={
        'num_gold': 'gold',
        'num_silver': 'silver',
        'num_bronze': 'bronze'
    }, inplace=True)

    # Sort by the specified metric
    sorted_disciplines = discipline_counts.sort_values(by=metric, ascending=False).head(top_x)

    return sorted_disciplines


In [39]:
# Example usage
top_disciplines_by_country(f_medals_daily, 'CHN', 10, metric='gold')

Unnamed: 0,discipline,gold,silver,bronze,total
9,Diving,8,2,1,11
14,Shooting,5,2,3,10
21,Weightlifting,5,0,0,5
17,Table Tennis,5,1,0,6
5,Boxing,3,2,0,5
16,Swimming,2,3,7,12
2,Artistic Swimming,2,0,0,2
4,Badminton,2,3,0,5
7,Canoe Sprint,2,0,0,2
1,Artistic Gymnastics,2,5,2,9


In [40]:
# Example usage
top_disciplines_by_country(f_medals_daily, 'USA', 10, metric='gold')

Unnamed: 0,discipline,gold,silver,bronze,total
4,Athletics,14,11,9,34
26,Swimming,8,13,7,28
2,Artistic Gymnastics,3,1,5,9
33,Wrestling,2,2,3,7
13,Cycling Track,2,0,0,2
5,Basketball,2,0,0,2
16,Fencing,2,1,1,4
32,Weightlifting,1,0,1,2
25,Surfing,1,0,0,1
22,Shooting,1,3,1,5


In [41]:
import pandas as pd
from IPython.display import display, HTML

def historical_results(file_path, country, metric):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    
    # Ensure metric is one of the allowed values
    if metric not in ['gold', 'silver', 'bronze', 'total', 'score']:
        raise ValueError("Metric must be one of: 'gold', 'silver', 'bronze', 'total', 'score'")
    
    # Calculate the score for each row
    df['Score'] = (df['Gold'] * 4) + (df['Silver'] * 2) + (df['Bronze'] * 1)
    
    # Group by Year and Nation, then sum the medals and score
    grouped = df.groupby(['Year', 'Nation']).sum().reset_index()
    
    # If metric is 'score', use the 'Score' column
    if metric == 'score':
        metric_col = 'Score'
    else:
        metric_col = metric.capitalize()
    
    # Calculate total metric for each year
    total_per_year = grouped.groupby('Year')[metric_col].sum().reset_index()
    total_per_year.columns = ['Year', 'Total_Metric']
    
    # Merge with the original grouped data to get the share of total
    merged = pd.merge(grouped, total_per_year, on='Year')
    merged['Share_of_Total'] = merged[metric_col] / merged['Total_Metric']
    
    # Filter for the specified country
    country_data = merged[merged['Nation'] == country]
    
    # Select the relevant columns
    result = country_data[['Year', 'Gold', 'Silver', 'Bronze', 'Total', metric_col, 'Total_Metric', 'Share_of_Total']]
    result.columns = ['Year', 'Gold', 'Silver', 'Bronze', 'Total', metric.capitalize(), 'Total ' + metric.capitalize() + ' Worldwide', 'Share of Total ' + metric.capitalize()]
    
    # Convert to HTML and format as a scrollable table
    pd.set_option('display.float_format', '{:.2%}'.format)
    html_output = result.to_html(index=False)
    
    # Display the country name on top and the scrollable table
    display(HTML(f"""
    <h2>{country}</h2>
    <div style="max-height: 400px; overflow-y: auto; border: 1px solid #ccc;">
        {html_output}
    </div>
    """))

# # Example usage
# historical_results('f_medals_overview.csv', 'Australia', 'total')

In [42]:
import pandas as pd

def paris_results(df, country, metric):
    # Ensure metric is one of the allowed values
    if metric not in ['gold', 'silver', 'bronze', 'total', 'score']:
        raise ValueError("Metric must be one of: 'gold', 'silver', 'bronze', 'total', 'score'")
    
    # Calculate the total number of medals
    df['Total'] = df['num_gold'] + df['num_silver'] + df['num_bronze']
    
    # Calculate the score for each row
    df['Score'] = (df['num_gold'] * 4) + (df['num_silver'] * 2) + (df['num_bronze'] * 1)
    
    # Convert date to datetime format and extract the year
    df['Date'] = pd.to_datetime(df['date'], format='%d-%m-%Y')
    df['Year'] = df['Date'].dt.year
    
    # Group by Year and country, then sum the medals and score, excluding the Date column
    grouped = df.groupby(['Year', 'country'], as_index=False).agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum',
        'Total': 'sum',
        'Score': 'sum'
    })
    
    # If metric is 'score', use the 'Score' column
    if metric == 'score':
        metric_col = 'Score'
    elif metric == 'total':
        metric_col = 'Total'
    else:
        metric_col = 'num_' + metric
    
    # Calculate total metric for each year
    total_per_year = grouped.groupby('Year')[metric_col].sum().reset_index()
    total_per_year.columns = ['Year', 'Total_Metric']
    
    # Merge with the original grouped data to get the share of total
    merged = pd.merge(grouped, total_per_year, on='Year')
    merged['Share_of_Total'] = merged[metric_col] / merged['Total_Metric']
    
    # Filter for the specified country
    country_data = merged[merged['country'] == country]
    
    # Select the relevant columns and pretty print the results
    result = country_data[['Year', 'num_gold', 'num_silver', 'num_bronze', 'Total', metric_col, 'Total_Metric', 'Share_of_Total']]
    result.columns = ['Year', 'Gold', 'Silver', 'Bronze', 'Total', metric.capitalize(), 'Total ' + metric.capitalize() + ' Worldwide', 'Share of Total ' + metric.capitalize()]
    
    # Pretty print the results
    pd.set_option('display.float_format', '{:.2%}'.format)
    print(result.to_string(index=False))

# # Example usage
# paris_results(f_medals_daily, 'AUS', 'total')

In [43]:
import pandas as pd
from IPython.display import display, HTML

def paris_results(df, country, metric):
    # Ensure metric is one of the allowed values
    if metric not in ['gold', 'silver', 'bronze', 'total', 'score']:
        raise ValueError("Metric must be one of: 'gold', 'silver', 'bronze', 'total', 'score'")
    
    # Calculate the total number of medals
    df['Total'] = df['num_gold'] + df['num_silver'] + df['num_bronze']
    
    # Calculate the score for each row
    df['Score'] = (df['num_gold'] * 4) + (df['num_silver'] * 2) + (df['num_bronze'] * 1)
    
    # Convert date to datetime format and extract the year
    df['Date'] = pd.to_datetime(df['date'], format='%d-%m-%Y')
    df['Year'] = df['Date'].dt.year
    
    # Group by Year and country, then sum the medals and score, excluding the Date column
    grouped = df.groupby(['Year', 'country'], as_index=False).agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum',
        'Total': 'sum',
        'Score': 'sum'
    })
    
    # If metric is 'score', use the 'Score' column
    if metric == 'score':
        metric_col = 'Score'
    elif metric == 'total':
        metric_col = 'Total'
    else:
        metric_col = 'num_' + metric
    
    # Calculate total metric for each year
    total_per_year = grouped.groupby('Year')[metric_col].sum().reset_index()
    total_per_year.columns = ['Year', 'Total_Metric']
    
    # Merge with the original grouped data to get the share of total
    merged = pd.merge(grouped, total_per_year, on='Year')
    merged['Share_of_Total'] = merged[metric_col] / merged['Total_Metric']
    
    # Filter for the specified country
    country_data = merged[merged['country'] == country]
    
    # Select the relevant columns
    result = country_data[['Year', 'num_gold', 'num_silver', 'num_bronze', 'Total', metric_col, 'Total_Metric', 'Share_of_Total']]
    result.columns = ['Year', 'Gold', 'Silver', 'Bronze', 'Total', metric.capitalize(), 'Total ' + metric.capitalize() + ' Worldwide', 'Share of Total ' + metric.capitalize()]
    
    # Convert to HTML and format as a scrollable table
    pd.set_option('display.float_format', '{:.2%}'.format)
    html_output = result.to_html(index=False)
    
    # Display the country name on top and the scrollable table
    display(HTML(f"""
    <h2>{country}</h2>
    <div style="max-height: 400px; overflow-y: auto; border: 1px solid #ccc;">
        {html_output}
    </div>
    """))

# # Example usage
# # Replace 'f_medals_daily' with your actual DataFrame variable
# paris_results(f_medals_daily, 'AUS', 'total')


In [58]:
import pandas as pd

def query_medals(file_path, metric, min_year):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)

    # Filter the DataFrame to only include data from the minimum year onwards
    df = df[df['Year'] >= min_year]

    # Calculate the score
    df['Score'] = (df['Gold'] * 4) + (df['Silver'] * 2) + (df['Bronze'] * 1)

    # Validate the metric
    valid_metrics = ['Gold', 'Silver', 'Bronze', 'Total', 'Score']
    if metric not in valid_metrics:
        raise ValueError(f"Invalid metric. Choose from {valid_metrics}")

    if metric == 'Score':
        total_by_year = df.groupby('Year')['Score'].sum().reset_index()
        total_by_year.columns = ['Year', 'Total_Score']
        df = pd.merge(df, total_by_year, on='Year')
        df['Share'] = df['Score'] / df['Total_Score']
        df['Total_Metric'] = df['Total_Score']
    else:
        # Calculate the total medals for each metric by year
        total_by_year = df.groupby('Year')[metric].sum().reset_index()
        total_by_year.columns = ['Year', 'Total_' + metric]

        # Merge the total medals with the original DataFrame
        df = pd.merge(df, total_by_year, on='Year')

        # Calculate the share of medals for the chosen metric
        df['Share'] = df[metric] / df['Total_' + metric]
        df['Total_Metric'] = df['Total_' + metric]

    # Sort the DataFrame by the share in descending order
    df = df.sort_values(by=['Share', 'Year'], ascending=[False, True]).reset_index(drop=True)

    # Add a Rank column
    df['Rank'] = df.groupby('Year')['Share'].rank("dense", ascending=False).astype(int)

    # Filter to only include cases where rank = 1
    df = df[df['Rank'] == 1]

    # Select relevant columns
    result = df[['Year', 'Nation', 'Gold', 'Silver', 'Bronze', 'Total', metric, 'Total_Metric', 'Share', 'Rank']]
    
    return result


In [59]:

# # Example usage
# historical_results('n_medals_overview.csv', 'United States', 'score')
# # print("\n")
# paris_results(f_medals_daily, 'USA', 'score')
# print("\n")

# All time best
result = query_medals('f_medals_overview.csv', 'Score', 1988)
print(result.to_string(index=False))

result.head()

 Year        Nation  Gold  Silver  Bronze  Total  Score  Total_Metric    Share  Rank
 1988  Soviet Union    55      31      46    132    328          1696 0.193396     1
 1992  Unified Team    45      38      29    112    285          1852 0.153888     1
 1996 United States    44      32      25    101    265          1928 0.137448     1
 2016 United States    46      37      38    121    296          2197 0.134729     1
 2012 United States    48      26      30    104    274          2175 0.125977     1
 2024 United States    40      44      42    126    290          2361 0.122829     1
 2008         China    48      22      30    100    266          2167 0.122750     1
 2004 United States    36      39      26    101    248          2129 0.116487     1
 2020 United States    39      41      33    113    271          2438 0.111157     1
 2000 United States    37      24      32     93    228          2127 0.107193     1


Unnamed: 0,Year,Nation,Gold,Silver,Bronze,Total,Score,Total_Metric,Share,Rank
0,1988,Soviet Union,55,31,46,132,328,1696,0.193396,1
1,1992,Unified Team,45,38,29,112,285,1852,0.153888,1
3,1996,United States,44,32,25,101,265,1928,0.137448,1
6,2016,United States,46,37,38,121,296,2197,0.134729,1
7,2012,United States,48,26,30,104,274,2175,0.125977,1


In [60]:
# Creating the bar chart
fig_gridlines = px.bar(result, x='Year', y='Share', template='base')

# Update layout to show only gridlines and format y-axis as percentages
fig_gridlines.update_layout(
    height=720,
    width=1280,
    xaxis_title=None,  # Removes x-axis label
    yaxis_title=None,  # Removes y-axis label
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent plot area
    paper_bgcolor='rgba(0,0,0,0)',  # Transparent figure background
    showlegend=False,  # Hide legend
    xaxis=dict(
        tickmode='linear',  # Ensure each tick corresponds to a year
        tick0=min(result['Year']),  # Start tick at the first year
        dtick=4  # Set ticks to appear every year
    ),
    yaxis=dict(
        tickformat=".0%",  # Format y-axis ticks as percentages
    )
)

# Update traces to hide bars and show gridlines only
fig_gridlines.update_traces(marker_color='rgba(0,0,0,0)')  # Makes the bars invisible

# Display the figure with just the gridlines
fig_gridlines.show()


In [47]:
# Creating the bar chart
fig_plot = px.bar(result, x='Year', y='Share', template='base')

# Update layout for the full plot and format y-axis as percentages
fig_plot.update_layout(
    height=720,
    width=1280,
    xaxis_title=None,  # Removes x-axis label
    yaxis_title=None,  # Removes y-axis label
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent plot area
    paper_bgcolor='rgba(0,0,0,0)',  # Transparent figure background
    xaxis=dict(
        tickmode='linear',  # Ensure each tick corresponds to a year
        tick0=min(result['Year']),  # Start tick at the first year
        dtick=4  # Set ticks to appear every year
    ),
    yaxis=dict(
        tickformat=".0%",  # Format y-axis ticks as percentages
    )
)

# Display the full plot
fig_plot.show()


## Racing Bar Chart

In [24]:
import pandas as pd

# Create a DataFrame for the country codes, region, and image URLs
country_info = pd.DataFrame({
    'country': ['AFG', 'ALB', 'ALG', 'ASA', 'AND', 'ANG', 'ANT', 'ARG', 'ARM', 'ARU', 'AUS', 'AUT', 'AZE', 'BAH', 'BRN', 
                'BAN', 'BAR', 'BLR', 'BEL', 'BIZ', 'BEN', 'BER', 'BHU', 'BOL', 'BIH', 'BOT', 'BRA', 'IVB', 'BRU', 'BUL', 
                'BUR', 'BDI', 'CPV', 'CAM', 'CMR', 'CAN', 'CAY', 'CAF', 'CHA', 'CHI', 'CHN', 'COL', 'COM', 'COD', 'CGO', 
                'CRC', 'CIV', 'CRO', 'CUB', 'CUW', 'CYP', 'CZE', 'DEN', 'DJI', 'DMA', 'DOM', 'ECU', 'EGY', 'SLV', 'GEQ', 
                'ERI', 'EST', 'SWZ', 'ETH', 'FRO', 'FIJ', 'FIN', 'FRA', 'PYF', 'GAB', 'GAM', 'GEO', 'GER', 'GHA', 'GIB', 
                'GRE', 'GRL', 'GRN', 'GUM', 'GUA', 'GUI', 'GBS', 'GUY', 'HAI', 'HON', 'HKG', 'HUN', 'ISL', 'IND', 'INA', 
                'IRI', 'IRQ', 'IRL', 'IMN', 'ISR', 'ITA', 'JAM', 'JPN', 'JOR', 'KAZ', 'KEN', 'KIR', 'PRK', 'KOR', 'KUW', 
                'KGZ', 'LAO', 'LAT', 'LIB', 'LES', 'LBR', 'LBA', 'LIE', 'LTU', 'LUX', 'MAC', 'MKD', 'MAD', 'MAW', 'MAS', 
                'MDV', 'MLI', 'MLT', 'MHL', 'MTN', 'MRI', 'MEX', 'FSM', 'MDA', 'MON', 'MGL', 'MNE', 'MAR', 'MOZ', 'MYA', 
                'NAM', 'NRU', 'NEP', 'NED', 'NCL', 'NZL', 'NCA', 'NIG', 'NGR', 'MNP', 'NOR', 'OMA', 'PAK', 'PLW', 'PAN', 
                'PNG', 'PAR', 'PER', 'PHI', 'POL', 'POR', 'PUR', 'QAT', 'ROM', 'RUS', 'RWA', 'SAM', 'SMR', 'STP', 'KSA', 
                'SEN', 'SRB', 'SEY', 'SLE', 'SGP', 'SXM', 'SVK', 'SLO', 'SOL', 'SOM', 'RSA', 'SSD', 'ESP', 'SRI', 'SKN', 
                'LCA', 'MAF', 'VIN', 'SUD', 'SUR', 'SWE', 'SUI', 'SYR', 'TJK', 'TAN', 'THA', 'TLS', 'TOG', 'TGA', 'TTO', 
                'TUN', 'TUR', 'TKM', 'TCA', 'TUV', 'UAE', 'UGA', 'GBR', 'UKR', 'URU', 'USA', 'UZB', 'VAN', 'VEN', 'VIE', 
                'ISV', 'PLE', 'YEM', 'ZAM', 'ZIM'],
    'image_url': ['https://public.flourish.studio/country-flags/svg/af.svg',
                  'https://public.flourish.studio/country-flags/svg/al.svg',
                  'https://public.flourish.studio/country-flags/svg/dz.svg',
                  'https://public.flourish.studio/country-flags/svg/as.svg',
                  'https://public.flourish.studio/country-flags/svg/ad.svg',
                  'https://public.flourish.studio/country-flags/svg/ao.svg',
                  'https://public.flourish.studio/country-flags/svg/ag.svg',
                  'https://public.flourish.studio/country-flags/svg/ar.svg',
                  'https://public.flourish.studio/country-flags/svg/am.svg',
                  'https://public.flourish.studio/country-flags/svg/aw.svg',
                  'https://public.flourish.studio/country-flags/svg/au.svg',
                  'https://public.flourish.studio/country-flags/svg/at.svg',
                  'https://public.flourish.studio/country-flags/svg/az.svg',
                  'https://public.flourish.studio/country-flags/svg/bs.svg',
                  'https://public.flourish.studio/country-flags/svg/bh.svg',
                  'https://public.flourish.studio/country-flags/svg/bd.svg',
                  'https://public.flourish.studio/country-flags/svg/bb.svg',
                  'https://public.flourish.studio/country-flags/svg/by.svg',
                  'https://public.flourish.studio/country-flags/svg/be.svg',
                  'https://public.flourish.studio/country-flags/svg/bz.svg',
                  'https://public.flourish.studio/country-flags/svg/bj.svg',
                  'https://public.flourish.studio/country-flags/svg/bm.svg',
                  'https://public.flourish.studio/country-flags/svg/bt.svg',
                  'https://public.flourish.studio/country-flags/svg/bo.svg',
                  'https://public.flourish.studio/country-flags/svg/ba.svg',
                  'https://public.flourish.studio/country-flags/svg/bw.svg',
                  'https://public.flourish.studio/country-flags/svg/br.svg',
                  'https://public.flourish.studio/country-flags/svg/vg.svg',
                  'https://public.flourish.studio/country-flags/svg/bn.svg',
                  'https://public.flourish.studio/country-flags/svg/bg.svg',
                  'https://public.flourish.studio/country-flags/svg/bf.svg',
                  'https://public.flourish.studio/country-flags/svg/bi.svg',
                  'https://public.flourish.studio/country-flags/svg/cv.svg',
                  'https://public.flourish.studio/country-flags/svg/kh.svg',
                  'https://public.flourish.studio/country-flags/svg/cm.svg',
                  'https://public.flourish.studio/country-flags/svg/ca.svg',
                  'https://public.flourish.studio/country-flags/svg/ky.svg',
                  'https://public.flourish.studio/country-flags/svg/cf.svg',
                  'https://public.flourish.studio/country-flags/svg/td.svg',
                  'https://public.flourish.studio/country-flags/svg/cl.svg',
                  'https://public.flourish.studio/country-flags/svg/cn.svg',
                  'https://public.flourish.studio/country-flags/svg/co.svg',
                  'https://public.flourish.studio/country-flags/svg/km.svg',
                  'https://public.flourish.studio/country-flags/svg/cd.svg',
                  'https://public.flourish.studio/country-flags/svg/cg.svg',
                  'https://public.flourish.studio/country-flags/svg/cr.svg',
                  'https://public.flourish.studio/country-flags/svg/ci.svg',
                  'https://public.flourish.studio/country-flags/svg/hr.svg',
                  'https://public.flourish.studio/country-flags/svg/cu.svg',
                  'https://public.flourish.studio/country-flags/svg/cw.svg',
                  'https://public.flourish.studio/country-flags/svg/cy.svg',
                  'https://public.flourish.studio/country-flags/svg/cz.svg',
                  'https://public.flourish.studio/country-flags/svg/dk.svg',
                  'https://public.flourish.studio/country-flags/svg/dj.svg',
                  'https://public.flourish.studio/country-flags/svg/dm.svg',
                  'https://public.flourish.studio/country-flags/svg/do.svg',
                  'https://public.flourish.studio/country-flags/svg/ec.svg',
                  'https://public.flourish.studio/country-flags/svg/eg.svg',
                  'https://public.flourish.studio/country-flags/svg/sv.svg',
                  'https://public.flourish.studio/country-flags/svg/gq.svg',
                  'https://public.flourish.studio/country-flags/svg/er.svg',
                  'https://public.flourish.studio/country-flags/svg/ee.svg',
                  'https://public.flourish.studio/country-flags/svg/sz.svg',
                  'https://public.flourish.studio/country-flags/svg/et.svg',
                  'https://public.flourish.studio/country-flags/svg/fo.svg',
                  'https://public.flourish.studio/country-flags/svg/fj.svg',
                  'https://public.flourish.studio/country-flags/svg/fi.svg',
                  'https://public.flourish.studio/country-flags/svg/fr.svg',
                  'https://public.flourish.studio/country-flags/svg/pf.svg',
                  'https://public.flourish.studio/country-flags/svg/ga.svg',
                  'https://public.flourish.studio/country-flags/svg/gm.svg',
                  'https://public.flourish.studio/country-flags/svg/ge.svg',
                  'https://public.flourish.studio/country-flags/svg/de.svg',
                  'https://public.flourish.studio/country-flags/svg/gh.svg',
                  'https://public.flourish.studio/country-flags/svg/gi.svg',
                  'https://public.flourish.studio/country-flags/svg/gr.svg',
                  'https://public.flourish.studio/country-flags/svg/gl.svg',
                  'https://public.flourish.studio/country-flags/svg/gd.svg',
                  'https://public.flourish.studio/country-flags/svg/gu.svg',
                  'https://public.flourish.studio/country-flags/svg/gt.svg',
                  'https://public.flourish.studio/country-flags/svg/gn.svg',
                  'https://public.flourish.studio/country-flags/svg/gw.svg',
                  'https://public.flourish.studio/country-flags/svg/gy.svg',
                  'https://public.flourish.studio/country-flags/svg/ht.svg',
                  'https://public.flourish.studio/country-flags/svg/hn.svg',
                  'https://public.flourish.studio/country-flags/svg/hk.svg',
                  'https://public.flourish.studio/country-flags/svg/hu.svg',
                  'https://public.flourish.studio/country-flags/svg/is.svg',
                  'https://public.flourish.studio/country-flags/svg/in.svg',
                  'https://public.flourish.studio/country-flags/svg/id.svg',
                  'https://public.flourish.studio/country-flags/svg/ir.svg',
                  'https://public.flourish.studio/country-flags/svg/iq.svg',
                  'https://public.flourish.studio/country-flags/svg/ie.svg',
                  'https://public.flourish.studio/country-flags/svg/im.svg',
                  'https://public.flourish.studio/country-flags/svg/il.svg',
                  'https://public.flourish.studio/country-flags/svg/it.svg',
                  'https://public.flourish.studio/country-flags/svg/jm.svg',
                  'https://public.flourish.studio/country-flags/svg/jp.svg',
                  'https://public.flourish.studio/country-flags/svg/jo.svg',
                  'https://public.flourish.studio/country-flags/svg/kz.svg',
                  'https://public.flourish.studio/country-flags/svg/ke.svg',
                  'https://public.flourish.studio/country-flags/svg/ki.svg',
                  'https://public.flourish.studio/country-flags/svg/kp.svg',
                  'https://public.flourish.studio/country-flags/svg/kr.svg',
                  'https://public.flourish.studio/country-flags/svg/kw.svg',
                  'https://public.flourish.studio/country-flags/svg/kg.svg',
                  'https://public.flourish.studio/country-flags/svg/la.svg',
                  'https://public.flourish.studio/country-flags/svg/lv.svg',
                  'https://public.flourish.studio/country-flags/svg/lb.svg',
                  'https://public.flourish.studio/country-flags/svg/ls.svg',
                  'https://public.flourish.studio/country-flags/svg/lr.svg',
                  'https://public.flourish.studio/country-flags/svg/ly.svg',
                  'https://public.flourish.studio/country-flags/svg/li.svg',
                  'https://public.flourish.studio/country-flags/svg/lt.svg',
                  'https://public.flourish.studio/country-flags/svg/lu.svg',
                  'https://public.flourish.studio/country-flags/svg/mo.svg',
                  'https://public.flourish.studio/country-flags/svg/mk.svg',
                  'https://public.flourish.studio/country-flags/svg/mg.svg',
                  'https://public.flourish.studio/country-flags/svg/mw.svg',
                  'https://public.flourish.studio/country-flags/svg/my.svg',
                  'https://public.flourish.studio/country-flags/svg/mv.svg',
                  'https://public.flourish.studio/country-flags/svg/ml.svg',
                  'https://public.flourish.studio/country-flags/svg/mt.svg',
                  'https://public.flourish.studio/country-flags/svg/mh.svg',
                  'https://public.flourish.studio/country-flags/svg/mr.svg',
                  'https://public.flourish.studio/country-flags/svg/mu.svg',
                  'https://public.flourish.studio/country-flags/svg/mx.svg',
                  'https://public.flourish.studio/country-flags/svg/fm.svg',
                  'https://public.flourish.studio/country-flags/svg/md.svg',
                  'https://public.flourish.studio/country-flags/svg/mc.svg',
                  'https://public.flourish.studio/country-flags/svg/mn.svg',
                  'https://public.flourish.studio/country-flags/svg/me.svg',
                  'https://public.flourish.studio/country-flags/svg/ma.svg',
                  'https://public.flourish.studio/country-flags/svg/mz.svg',
                  'https://public.flourish.studio/country-flags/svg/mm.svg',
                  'https://public.flourish.studio/country-flags/svg/na.svg',
                  'https://public.flourish.studio/country-flags/svg/nr.svg',
                  'https://public.flourish.studio/country-flags/svg/np.svg',
                  'https://public.flourish.studio/country-flags/svg/nl.svg',
                  'https://public.flourish.studio/country-flags/svg/nc.svg',
                  'https://public.flourish.studio/country-flags/svg/nz.svg',
                  'https://public.flourish.studio/country-flags/svg/ni.svg',
                  'https://public.flourish.studio/country-flags/svg/ne.svg',
                  'https://public.flourish.studio/country-flags/svg/ng.svg',
                  'https://public.flourish.studio/country-flags/svg/mp.svg',
                  'https://public.flourish.studio/country-flags/svg/no.svg',
                  'https://public.flourish.studio/country-flags/svg/om.svg',
                  'https://public.flourish.studio/country-flags/svg/pk.svg',
                  'https://public.flourish.studio/country-flags/svg/pw.svg',
                  'https://public.flourish.studio/country-flags/svg/pa.svg',
                  'https://public.flourish.studio/country-flags/svg/pg.svg',
                  'https://public.flourish.studio/country-flags/svg/py.svg',
                  'https://public.flourish.studio/country-flags/svg/pe.svg',
                  'https://public.flourish.studio/country-flags/svg/ph.svg',
                  'https://public.flourish.studio/country-flags/svg/pl.svg',
                  'https://public.flourish.studio/country-flags/svg/pt.svg',
                  'https://public.flourish.studio/country-flags/svg/pr.svg',
                  'https://public.flourish.studio/country-flags/svg/qa.svg',
                  'https://public.flourish.studio/country-flags/svg/ro.svg',
                  'https://public.flourish.studio/country-flags/svg/ru.svg',
                  'https://public.flourish.studio/country-flags/svg/rw.svg',
                  'https://public.flourish.studio/country-flags/svg/ws.svg',
                  'https://public.flourish.studio/country-flags/svg/sm.svg',
                  'https://public.flourish.studio/country-flags/svg/st.svg',
                  'https://public.flourish.studio/country-flags/svg/sa.svg',
                  'https://public.flourish.studio/country-flags/svg/sn.svg',
                  'https://public.flourish.studio/country-flags/svg/rs.svg',
                  'https://public.flourish.studio/country-flags/svg/sc.svg',
                  'https://public.flourish.studio/country-flags/svg/sl.svg',
                  'https://public.flourish.studio/country-flags/svg/sg.svg',
                  'https://public.flourish.studio/country-flags/svg/sx.svg',
                  'https://public.flourish.studio/country-flags/svg/sk.svg',
                  'https://public.flourish.studio/country-flags/svg/si.svg',
                  'https://public.flourish.studio/country-flags/svg/sb.svg',
                  'https://public.flourish.studio/country-flags/svg/so.svg',
                  'https://public.flourish.studio/country-flags/svg/za.svg',
                  'https://public.flourish.studio/country-flags/svg/ss.svg',
                  'https://public.flourish.studio/country-flags/svg/es.svg',
                  'https://public.flourish.studio/country-flags/svg/lk.svg',
                  'https://public.flourish.studio/country-flags/svg/kn.svg',
                  'https://public.flourish.studio/country-flags/svg/lc.svg',
                  'https://public.flourish.studio/country-flags/svg/mf.svg',
                  'https://public.flourish.studio/country-flags/svg/vc.svg',
                  'https://public.flourish.studio/country-flags/svg/sd.svg',
                  'https://public.flourish.studio/country-flags/svg/sr.svg',
                  'https://public.flourish.studio/country-flags/svg/se.svg',
                  'https://public.flourish.studio/country-flags/svg/ch.svg',
                  'https://public.flourish.studio/country-flags/svg/sy.svg',
                  'https://public.flourish.studio/country-flags/svg/tj.svg',
                  'https://public.flourish.studio/country-flags/svg/tz.svg',
                  'https://public.flourish.studio/country-flags/svg/th.svg',
                  'https://public.flourish.studio/country-flags/svg/tl.svg',
                  'https://public.flourish.studio/country-flags/svg/tg.svg',
                  'https://public.flourish.studio/country-flags/svg/to.svg',
                  'https://public.flourish.studio/country-flags/svg/tt.svg',
                  'https://public.flourish.studio/country-flags/svg/tn.svg',
                  'https://public.flourish.studio/country-flags/svg/tr.svg',
                  'https://public.flourish.studio/country-flags/svg/tm.svg',
                  'https://public.flourish.studio/country-flags/svg/tc.svg',
                  'https://public.flourish.studio/country-flags/svg/tv.svg',
                  'https://public.flourish.studio/country-flags/svg/ae.svg',
                  'https://public.flourish.studio/country-flags/svg/ug.svg',
                  'https://public.flourish.studio/country-flags/svg/gb.svg',
                  'https://public.flourish.studio/country-flags/svg/ua.svg',
                  'https://public.flourish.studio/country-flags/svg/uy.svg',
                  'https://public.flourish.studio/country-flags/svg/us.svg',
                  'https://public.flourish.studio/country-flags/svg/uz.svg',
                  'https://public.flourish.studio/country-flags/svg/vu.svg',
                  'https://public.flourish.studio/country-flags/svg/ve.svg',
                  'https://public.flourish.studio/country-flags/svg/vn.svg',
                  'https://public.flourish.studio/country-flags/svg/vi.svg',
                  'https://public.flourish.studio/country-flags/svg/ps.svg',
                  'https://public.flourish.studio/country-flags/svg/ye.svg',
                  'https://public.flourish.studio/country-flags/svg/zm.svg',
                  'https://public.flourish.studio/country-flags/svg/zw.svg']
})

# Step 2: Merge the DataFrames
merged_df = f_medals_daily.merge(country_info, left_on='country', right_on='country', how='left')

# Step 3: Convert the date column to datetime format for correct sorting
merged_df['date'] = pd.to_datetime(merged_df['date'], format='%d-%m-%Y')

# Step 4: Aggregate the Gold Medal Counts by Date and Country
gold_medals_aggregated = merged_df.groupby(['country', 'date', 'image_url']).agg({
    'num_gold': 'sum'
}).reset_index()

# Step 5: Pivot the DataFrame so that each row is a country and each column is a date
gold_medals_pivot = gold_medals_aggregated.pivot(index=['country', 'image_url'], columns='date', values='num_gold')

# Step 6: Replace NaN with 0 (no gold medals on that date)
gold_medals_pivot = gold_medals_pivot.fillna(0)

# Step 7: Ensure Cumulative Gold Medals across dates for each country
gold_medals_cumulative = gold_medals_pivot.cumsum(axis=1)

# Step 8: Sort the date columns chronologically
gold_medals_cumulative = gold_medals_cumulative.sort_index(axis=1)

# Step 9: Reset the index to have 'country' as a column
gold_medals_cumulative_reset = gold_medals_cumulative.reset_index()

# Step 10: Display the final table as an HTML table without the date columns
gold_medals_cumulative_reset.to_html('gold_medals_table.html')

# Optionally, display it directly in a Jupyter notebook or other environments
from IPython.display import display, HTML
display(HTML(gold_medals_cumulative_reset.to_html(index=False)))


date,country,image_url,2024-07-27 00:00:00,2024-07-28 00:00:00,2024-07-29 00:00:00,2024-07-30 00:00:00,2024-07-31 00:00:00,2024-08-01 00:00:00,2024-08-02 00:00:00,2024-08-03 00:00:00,2024-08-04 00:00:00,2024-08-05 00:00:00,2024-08-06 00:00:00,2024-08-07 00:00:00,2024-08-08 00:00:00,2024-08-09 00:00:00,2024-08-10 00:00:00,2024-08-11 00:00:00
,ALB,https://public.flourish.studio/country-flags/svg/al.svg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,ALG,https://public.flourish.studio/country-flags/svg/dz.svg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0
,ARG,https://public.flourish.studio/country-flags/svg/ar.svg,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
,ARM,https://public.flourish.studio/country-flags/svg/am.svg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
,AUS,https://public.flourish.studio/country-flags/svg/au.svg,3.0,4.0,5.0,6.0,7.0,8.0,11.0,12.0,12.0,13.0,14.0,18.0,18.0,18.0,18.0,18.0
,AUT,https://public.flourish.studio/country-flags/svg/at.svg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,2.0,2.0
,AZE,https://public.flourish.studio/country-flags/svg/az.svg,0.0,0.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0
,BEL,https://public.flourish.studio/country-flags/svg/be.svg,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0
,BOT,https://public.flourish.studio/country-flags/svg/bw.svg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
,BRA,https://public.flourish.studio/country-flags/svg/br.svg,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,3.0,3.0,3.0


## Appendix

#### Weighted medal tally

In [48]:
# Define the discipline weighting schemes

# **Multiplicative Weights for Disciplines**
discipline_multiplicative = {
    # Equestrian (negative impact)
    "Equestrian": -1,

    # Inaccessibles (disciplines considered less accessible)
    "Fencing": 0.25,
    "Golf": 0.25,
    "Modern Pentathlon": 0.25,
    "Sailing": 0.25,
    "Shooting": 0.25,

    # Too new (disciplines considered newer or less established)
    "Breaking": 0.5,
    "Basketball 3x3": 0.5,
    "Cycling BMX Freestyle": 0.5,
    "Skateboarding": 0.5,
    "Sport Climbing": 0.5,
    "Surfing": 0.5,
    "Rugby Sevens": 0.5,

    # The majority (standard multiplier for most disciplines)
    "Archery": 1,
    "Artistic Swimming": 1,
    "Badminton": 1,
    "Basketball": 1,
    "Beach Volleyball": 1,
    "Canoe Slalom": 1,
    "Canoe Sprint": 1,
    "Cycling BMX Racing": 1,
    "Cycling Mountain Bike": 1,
    "Cycling Road": 1,
    "Cycling Track": 1,
    "Diving": 1,
    "Football": 1,
    "Handball": 1,
    "Hockey": 1,
    "Judo": 1,
    "Marathon Swimming": 1,
    "Rhythmic Gymnastics": 1,
    "Rowing": 1,
    "Table Tennis": 1,
    "Taekwondo": 1,
    "Tennis": 1,
    "Trampoline": 1,
    "Triathlon": 1,
    "Volleyball": 1,
    "Waterpolo": 1
}

# **Additive Weights for Disciplines**
discipline_additive = {
    # The greats (additional points for top-tier disciplines)
    "Athletics": 1,
    "Artistic Gymnastics": 1,
    "Boxing": 1,
    "Swimming": 1,
    "Weightlifting": 1,
    "Wrestling": 1
}

# **Additive Weights for Sub-Disciplines**
sub_discipline_additive = {
    # # Athletics elite events
    # ("Athletics", "Men's 100m Final"): 2,
    # ("Athletics", "Men's 200m Final"): 2,
    # ("Athletics", "Men's Marathon Final"): 2,
    # ("Athletics", "Men's High Jump Final"): 2,
    # ("Athletics", "Men's Long Jump Final"): 2,
    # ("Athletics", "Men's Discus Throw Final"): 2,
    # ("Athletics", "Men's Javelin Throw Final"): 2,
    # ("Athletics", "Men's Decathlon Final"): 2,
    # ("Athletics", "Men's 4 x 100m Relay Final"): 2,
    # ("Athletics", "Men's 4 x 400m Relay Final"): 2,
    # ("Athletics", "Women's 100m Final"): 2,
    # ("Athletics", "Women's 200m Final"): 2,
    # ("Athletics", "Women's Marathon Final"): 2,
    # ("Athletics", "Women's High Jump Final"): 2,
    # ("Athletics", "Women's Long Jump Final"): 2,
    # ("Athletics", "Women's Discus Throw Final"): 2,
    # ("Athletics", "Women's Heptathlon Final"): 2,
    # ("Athletics", "Women's 4 x 100m Relay Final"): 2,
    # ("Athletics", "Women's 4 x 400m Relay Final"): 2,
    # ("Athletics", "4 x 400m Relay Mixed Final"): 2,

    # # Artistic Gymnastics elite events
    # ("Artistic Gymnastics", "Women's Team Final"): 2,
    # ("Artistic Gymnastics", "Men's Team Final"): 2,

    # # Swimming elite events
    # ("Swimming", "Women's 100m Freestyle Final"): 2,
    # ("Swimming", "Women's 200m Individual Medley Final"): 2,
    # ("Swimming", "Women's 4 x 100m Freestyle Relay Final"): 2,
    # ("Swimming", "Women's 4 x 100m Medley Relay Final"): 2,
    # ("Swimming", "Men's 100m Freestyle Final"): 2,
    # ("Swimming", "Men's 200m Individual Medley Final"): 2,
    # ("Swimming", "Men's 4 x 100m Freestyle Relay Final"): 2,
    # ("Swimming", "Men's 4 x 100m Medley Relay Final"): 2,
    # ("Swimming", "Mixed 4 x 100m Medley Relay Final"): 2
}

In [49]:
import pandas as pd
from IPython.display import display, HTML

def medal_weighting(f_medals_daily, weighting_function):
    """
    Calculate weighted scores for medals based on a given weighting function,
    including discipline and sub-discipline adjustments.

    Parameters:
    - f_medals_daily: DataFrame containing daily medal data with columns 
                      ['date', 'country', 'num_gold', 'num_silver', 'num_bronze', 
                       'discipline', 'sub_discipline'].
    - weighting_function: String specifying the medal weighting scheme, 
                          options are "gold_boost", "standard", "medal_count", 
                          "gold_count", "silver_count", "bronze_count".

    The function calculates:
    - Medal scores based on the selected weighting scheme.
    - Discipline impacts:
      - Multiplicative: Adjusts the score by a factor representing the difficulty 
                        or rarity of the discipline.
      - Additive: Adds points to the score for elite disciplines.
    - Sub-discipline impacts: Adds specific points for key gold medal events in 
      certain sub-disciplines.
    - Final scores combining all impacts, and cumulative scores for each country.

    Returns:
    - f_medals_daily_grouped: DataFrame with weighted scores, final scores, and 
                              cumulative scores for each country by date.
    """

    # Define the weighting schemes for medals
    weighting_schemes = {
        "gold_boost": {'gold': 4, 'silver': 2, 'bronze': 1},
        "standard": {'gold': 3, 'silver': 2, 'bronze': 1},
        "medal_count": {'gold': 1, 'silver': 1, 'bronze': 1},
        "gold_count": {'gold': 1, 'silver': 0, 'bronze': 0},
        "silver_count": {'gold': 0, 'silver': 1, 'bronze': 0},
        "bronze_count": {'gold': 0, 'silver': 0, 'bronze': 1}
    }

    # Select the appropriate weighting scheme
    weights = weighting_schemes.get(weighting_function)
    if not weights:
        raise ValueError(f"Invalid weighting function: {weighting_function}")
    
    # Calculate medal scores based on the selected weighting scheme
    f_medals_daily['gold_score'] = f_medals_daily['num_gold'] * weights['gold']
    f_medals_daily['silver_score'] = f_medals_daily['num_silver'] * weights['silver']
    f_medals_daily['bronze_score'] = f_medals_daily['num_bronze'] * weights['bronze']
    
    # Calculate total score based on medal weights
    f_medals_daily['score'] = (
        f_medals_daily['gold_score'] +
        f_medals_daily['silver_score'] +
        f_medals_daily['bronze_score']
    )
    
    # Calculate the number of medals
    f_medals_daily['num_medals'] = (
        f_medals_daily['num_gold'] + 
        f_medals_daily['num_silver'] + 
        f_medals_daily['num_bronze']
    )
    
    # Apply discipline multiplicative impacts
    f_medals_daily['disc_mult'] = f_medals_daily['score'] * f_medals_daily['discipline'].map(discipline_multiplicative).fillna(1)
    
    # Apply discipline additive impacts
    f_medals_daily['disc_add'] = f_medals_daily['discipline'].map(discipline_additive).fillna(0)

    # Apply sub-discipline additive impacts only for gold medals
    f_medals_daily['sub_disc_add'] = f_medals_daily.apply(
        lambda row: sub_discipline_additive.get((row['discipline'], row['sub_discipline']), 0) if row['num_gold'] > 0 else 0, axis=1
    )
    
    # Calculate the final score including discipline and sub-discipline impacts
    f_medals_daily['final_score'] = f_medals_daily['disc_mult'] + f_medals_daily['disc_add'] + f_medals_daily['sub_disc_add']
    
    # Convert the date column to datetime format
    f_medals_daily['date'] = pd.to_datetime(f_medals_daily['date'], format='%d-%m-%Y')

    # Group by date and country, summing the scores and medal counts
    f_medals_daily_grouped = f_medals_daily.groupby(['date', 'country']).agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum',
        'num_medals': 'sum',
        'gold_score': 'sum',
        'silver_score': 'sum',
        'bronze_score': 'sum',
        'score': 'sum',
        'disc_mult': 'sum',
        'disc_add': 'sum',
        'sub_disc_add': 'sum',
        'final_score': 'sum'
    }).reset_index()
    
    # Calculate cumulative scores for each country
    f_medals_daily_grouped['cum_medals'] = f_medals_daily_grouped.groupby('country')['num_medals'].cumsum()
    f_medals_daily_grouped['cum_score'] = f_medals_daily_grouped.groupby('country')['score'].cumsum()
    f_medals_daily_grouped['cum_final_score'] = f_medals_daily_grouped.groupby('country')['final_score'].cumsum()

    # Reorder columns for a logical order
    f_medals_daily_grouped = f_medals_daily_grouped[[
        'date', 'country', 
        'num_gold', 'num_silver', 'num_bronze', 'num_medals', 'cum_medals',
        'gold_score', 'silver_score', 'bronze_score', 
        'score', 'cum_score', 
        'disc_mult', 'disc_add', 'sub_disc_add', 
        'final_score', 'cum_final_score'
    ]]
    
    return f_medals_daily_grouped

# Example usage
# Assume f_medals_daily is a DataFrame with the appropriate columns
f_medals_weighted = medal_weighting(f_medals_daily, weighting_function="gold_boost")

# Display the resulting DataFrame
display(HTML(f"<div style='max-height: 400px; overflow: auto;'>{f_medals_weighted.to_html(index=False)}</div>"))

date,country,num_gold,num_silver,num_bronze,num_medals,cum_medals,gold_score,silver_score,bronze_score,score,cum_score,disc_mult,disc_add,sub_disc_add,final_score,cum_final_score
2024-07-27,AUS,3,2,0,5,5,12,4,0,16,16,16.0,4.0,0,20.0,20.0
2024-07-27,BEL,1,0,1,2,2,4,0,1,5,5,5.0,0.0,0,5.0,5.0
2024-07-27,CAN,0,1,0,1,1,0,2,0,2,2,2.0,1.0,0,3.0,3.0
2024-07-27,CHN,2,0,1,3,3,8,0,1,9,9,6.0,1.0,0,7.0,7.0
2024-07-27,ESP,0,0,1,1,1,0,0,1,1,1,1.0,0.0,0,1.0,1.0
2024-07-27,FIJ,0,1,0,1,1,0,2,0,2,2,1.0,0.0,0,1.0,1.0
2024-07-27,FRA,1,2,1,4,4,4,4,1,9,9,5.5,0.0,0,5.5,5.5
2024-07-27,GBR,0,1,1,2,2,0,2,1,3,3,3.0,0.0,0,3.0,3.0
2024-07-27,GER,1,0,0,1,1,4,0,0,4,4,4.0,1.0,0,5.0,5.0
2024-07-27,HKG,1,0,0,1,1,4,0,0,4,4,1.0,0.0,0,1.0,1.0


### Pretty Print

In [50]:
import pandas as pd

def pretty_print_summary(f_medals_daily):
    """
    Prints a summary table of medal counts by country, ordered by total, gold, silver, and bronze medals in descending order.

    Parameters:
    - f_medals_daily: DataFrame containing daily medal data with columns
                      ['date', 'name', 'country', 'discipline', 'sub_discipline',
                       'num_gold', 'num_silver', 'num_bronze'].

    The function outputs a pretty-printed summary table of gold, silver, bronze, and total medals for each country.
    """
    
    # Aggregate the data to get the total medals by country
    summary_data = f_medals_daily.groupby('country').agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()

    # Calculate total medals
    summary_data['total'] = summary_data['num_gold'] + summary_data['num_silver'] + summary_data['num_bronze']

    # Sort the summary data by total, gold, silver, and bronze medal counts in descending order
    summary_data = summary_data.sort_values(by=['num_gold', 'num_silver', 'num_bronze'], 
                                            ascending=[False, False, False]).reset_index(drop=True)

    # Pretty print the summary table
    print("\nMedal Summary by Country:")
    print("-" * 60)
    print("{:<20} {:>6} {:>6} {:>6} {:>8}".format('Country', 'Gold', 'Silver', 'Bronze', 'Total'))
    print("-" * 60)

    for _, row in summary_data.iterrows():
        country = row['country']
        num_gold = row['num_gold']
        num_silver = row['num_silver']
        num_bronze = row['num_bronze']
        total = row['total']
        
        print("{:<20} {:>6} {:>6} {:>6} {:>8}".format(
            country, num_gold, num_silver, num_bronze, total
        ))

    # Calculate total medals for all countries
    total_gold = summary_data['num_gold'].sum()
    total_silver = summary_data['num_silver'].sum()
    total_bronze = summary_data['num_bronze'].sum()
    total_medals = summary_data['total'].sum()

    print("-" * 60)
    print("{:<20} {:>6} {:>6} {:>6} {:>8}".format(
        "Total", total_gold, total_silver, total_bronze, total_medals
    ))
    print("-" * 60)
    print(f"Total Countries: {summary_data.shape[0]}\n")

# # Example usage
# # Assume f_medals_daily is a DataFrame with the appropriate columns
pretty_print_summary(f_medals_daily)



Medal Summary by Country:
------------------------------------------------------------
Country                Gold Silver Bronze    Total
------------------------------------------------------------
USA                      40     44     42      126
CHN                      40     27     24       91
JPN                      20     12     13       45
AUS                      18     19     16       53
FRA                      16     26     22       64
NED                      15      7     12       34
GBR                      14     22     29       65
KOR                      13      9     10       32
ITA                      12     13     15       40
GER                      12     13      8       33
NZL                      10      7      3       20
CAN                       9      7     11       27
UZB                       8      2      3       13
HUN                       6      7      6       19
ESP                       5      4      9       18
SWE                       4      4 

In [51]:
import pandas as pd

def pretty_print_summary_by_discipline(f_medals_daily):
    """
    Prints a summary table of medal counts by discipline, ordered by total, gold, silver, and bronze medals in descending order.

    Parameters:
    - f_medals_daily: DataFrame containing daily medal data with columns
                      ['date', 'name', 'country', 'discipline', 'sub_discipline',
                       'num_gold', 'num_silver', 'num_bronze'].

    The function outputs a pretty-printed summary table of gold, silver, bronze, and total medals for each discipline.
    """
    
    # Aggregate the data to get the total medals by discipline
    summary_data = f_medals_daily.groupby('discipline').agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()

    # Calculate total medals
    summary_data['total'] = summary_data['num_gold'] + summary_data['num_silver'] + summary_data['num_bronze']

    # Sort the summary data by total, gold, silver, and bronze medal counts in descending order
    summary_data = summary_data.sort_values(by=['total', 'num_gold', 'num_silver', 'num_bronze'], 
                                            ascending=[False, False, False, False]).reset_index(drop=True)

    # Pretty print the summary table
    print("\nMedal Summary by Discipline:")
    print("-" * 60)
    print("{:<25} {:>6} {:>6} {:>6} {:>8}".format('Discipline', 'Gold', 'Silver', 'Bronze', 'Total'))
    print("-" * 60)

    for _, row in summary_data.iterrows():
        discipline = row['discipline']
        num_gold = row['num_gold']
        num_silver = row['num_silver']
        num_bronze = row['num_bronze']
        total = row['total']
        
        print("{:<25} {:>6} {:>6} {:>6} {:>8}".format(
            discipline, num_gold, num_silver, num_bronze, total
        ))

    # Calculate total medals for all disciplines
    total_gold = summary_data['num_gold'].sum()
    total_silver = summary_data['num_silver'].sum()
    total_bronze = summary_data['num_bronze'].sum()
    total_medals = summary_data['total'].sum()

    print("-" * 60)
    print("{:<25} {:>6} {:>6} {:>6} {:>8}".format(
        "Total", total_gold, total_silver, total_bronze, total_medals
    ))
    print("-" * 60)
    print(f"Total Disciplines: {summary_data.shape[0]}\n")

# # Example usage
# # Assume f_medals_daily is a DataFrame with the appropriate columns
# pretty_print_summary_by_discipline(f_medals_daily)


In [52]:
import pandas as pd

def pretty_print_filtered_events_medal_tally(f_medals_daily, filtered_events, filter_equal_medals=False):
    """
    Prints a summary of the medal tally for the specified filtered events, ordered by gold, silver, and bronze medals,
    and also includes the total medal count for each country across all events. Includes an option to filter countries
    where Medals = Total Medals.

    Parameters:
    - f_medals_daily: DataFrame containing daily medal data with columns
                      ['date', 'name', 'country', 'discipline', 'sub_discipline',
                       'num_gold', 'num_silver', 'num_bronze'].
    - filtered_events: List of disciplines to be filtered and considered for the tally.
    - filter_equal_medals: Boolean specifying whether to filter countries where Medals = Total Medals.
    """
    
    # Filter the DataFrame for the specified filtered events
    selected_data_filtered_events = f_medals_daily[f_medals_daily['discipline'].isin(filtered_events)]
    
    # Group data by country for filtered events
    grouped_data_filtered_events = selected_data_filtered_events.groupby('country').agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()
    
    # Calculate total medals for filtered events
    grouped_data_filtered_events['total'] = grouped_data_filtered_events['num_gold'] + grouped_data_filtered_events['num_silver'] + grouped_data_filtered_events['num_bronze']

    # Order by gold, silver, and bronze medals for filtered events
    grouped_data_filtered_events = grouped_data_filtered_events.sort_values(by=['num_gold', 'num_silver', 'num_bronze'], ascending=[False, False, False]).reset_index(drop=True)

    # Group data by country for all events
    grouped_data_all_events = f_medals_daily.groupby('country').agg({
        'num_gold': 'sum',
        'num_silver': 'sum',
        'num_bronze': 'sum'
    }).reset_index()
    
    # Calculate total medals for all events
    grouped_data_all_events['total'] = grouped_data_all_events['num_gold'] + grouped_data_all_events['num_silver'] + grouped_data_all_events['num_bronze']

    # Merge the two grouped dataframes
    merged_data = pd.merge(grouped_data_filtered_events, grouped_data_all_events, on='country', suffixes=('_filtered', '_all'))

    # Apply filter if required
    if filter_equal_medals:
        merged_data = merged_data[merged_data['total_filtered'] == merged_data['total_all']]

    # Pretty print the results
    print("\nMedal Tally for Filtered Events:")
    print("-" * 80)
    print("{:<20} {:>6} {:>6} {:>6} {:>6} {:>12}".format('Country', 'Gold', 'Silver', 'Bronze', 'Medals', 'Total Medals'))
    print("-" * 80)

    for _, row in merged_data.iterrows():
        country = row['country']
        num_gold_filtered = row['num_gold_filtered']
        num_silver_filtered = row['num_silver_filtered']
        num_bronze_filtered = row['num_bronze_filtered']
        total_filtered = row['total_filtered']
        total_all = row['total_all']
        
        print("{:<20} {:>6} {:>6} {:>6} {:>6} {:>12}".format(
            country, num_gold_filtered, num_silver_filtered, num_bronze_filtered, total_filtered, total_all
        ))

    print("-" * 80)
    total_gold_filtered = grouped_data_filtered_events['num_gold'].sum()
    total_silver_filtered = grouped_data_filtered_events['num_silver'].sum()
    total_bronze_filtered = grouped_data_filtered_events['num_bronze'].sum()
    total_medals_filtered = total_gold_filtered + total_silver_filtered + total_bronze_filtered

    print(f"Filtered: {total_gold_filtered:>2} Gold, {total_silver_filtered:>2} Silver, {total_bronze_filtered:>2} Bronze, {total_medals_filtered:>2} Medals\n")

# Example usage
# Assume f_medals_daily is a DataFrame with the appropriate columns
pretty_print_filtered_events_medal_tally(f_medals_daily, ass_kicking_events, filter_equal_medals=True)


Medal Tally for Filtered Events:
--------------------------------------------------------------------------------
Country                Gold Silver Bronze Medals Total Medals
--------------------------------------------------------------------------------
IRI                       3      6      3     12           12
AZE                       2      2      3      7            7
KGZ                       0      2      4      6            6
KOS                       0      1      1      2            2
JOR                       0      1      0      1            1
MGL                       0      1      0      1            1
PAN                       0      1      0      1            1
TJK                       0      0      3      3            3
ALB                       0      0      2      2            2
CIV                       0      0      1      1            1
CPV                       0      0      1      1            1
EOR                       0      0      1      1            

### Extract medals data from URL (Draft: doesn't extract events within dropdown)

In [53]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

def fetch_individual_events(date):
    # URL of the medal events page
    url = f'https://olympics.com/en/paris-2024/schedule/{date}?medalEvents=true'

    # Send a GET request to fetch the raw HTML content
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the section containing events
    medal_events_section = soup.find_all('div', class_='emotion-srm-1ofqig9')

    # Debugging: Check if the sections were found
    print(f"Found {len(medal_events_section)} events")

    # List to hold event data
    events_data = []

    # Loop through each medal event and extract details
    for event in medal_events_section:
        try:
            # Check if the event is head-to-head
            h2h_competitors = event.find_all('div', class_='h2h-competitor')
            if h2h_competitors:
                continue  # Skip head-to-head events

            event_time_tag = event.find('time', class_='emotion-srm-1myzqq1')
            event_title_tag = event.find('span', class_='discipline-title')
            event_sub_title_tag = event.find('span', class_='discipline-sub-title')
            event_medal_tag = event.find('div', class_='emotion-srm-2v3gnk')

            start_time = event_time_tag['datetime'] if event_time_tag and 'datetime' in event_time_tag.attrs else "N/A"
            title = event_title_tag.text.strip() if event_title_tag else "N/A"
            sub_title = event_sub_title_tag.text.strip() if event_sub_title_tag else "N/A"
            medal_event = event_medal_tag.find('img')['title'] if event_medal_tag and event_medal_tag.find('img') else "No"

            # Only include medal events
            if medal_event != "No":
                gold_names, gold_countries = [], []
                silver_names, silver_countries = [], []
                bronze_names, bronze_countries = [], []

                competitors = event.find_all('div', class_='emotion-srm-69i1ev')
                for competitor in competitors:
                    country_tag = competitor.find('span', class_='e1oix8v91 emotion-srm-18k92tf')
                    name_tag = competitor.find('span', class_='e1oix8v91 emotion-srm-47h3k2')
                    medal_tag = competitor.find('div', class_='emotion-srm-hyxlzm e1uhuzof0')

                    country = country_tag.text.strip() if country_tag else "N/A"
                    name = name_tag.text.strip() if name_tag else "N/A"
                    medal = medal_tag.find('img')['alt'] if medal_tag else "N/A"

                    if "Gold" in medal:
                        gold_names.append(name)
                        gold_countries.append(country)
                    elif "Silver" in medal:
                        silver_names.append(name)
                        silver_countries.append(country)
                    elif "Bronze" in medal:
                        bronze_names.append(name)
                        bronze_countries.append(country)

                # Handle multiple medalists
                gold_name = ', '.join(gold_names) if gold_names else "N/A"
                gold_country = ', '.join(gold_countries) if gold_countries else "N/A"
                silver_name = ', '.join(silver_names) if silver_names else "N/A"
                silver_country = ', '.join(silver_countries) if silver_countries else "N/A"
                bronze_name = ', '.join(bronze_names) if bronze_names else "N/A"
                bronze_country = ', '.join(bronze_countries) if bronze_countries else "N/A"

                events_data.append({
                    'Start Time': start_time,
                    'Title': title,
                    'Sub-Title': sub_title,
                    'Gold Name': gold_name,
                    'Gold Country': gold_country,
                    'Silver Name': silver_name,
                    'Silver Country': silver_country,
                    'Bronze Name': bronze_name,
                    'Bronze Country': bronze_country
                })
        except Exception as e:
            print(f"Error processing event: {e}")

    # Create a DataFrame
    df = pd.DataFrame(events_data)

    # Drop duplicate entries
    df = df.drop_duplicates()

    # Convert 'Start Time' to datetime format and filter by input date
    df['Start Time'] = pd.to_datetime(df['Start Time'], errors='coerce')
    
    # Convert input date to '2024-07-27' format for comparison
    comparison_date = datetime.strptime(f'2024-{date}', '%Y-%d-%B').date()
    df = df[df['Start Time'].dt.date == comparison_date]

    # Sort by 'Start Time'
    df = df.sort_values(by='Start Time').reset_index(drop=True)

    # Pretty print the DataFrame
    print(df.to_string(index=False))

    # # Save the DataFrame to a CSV file
    # df.to_csv(f'olympic_medal_events_{date}.csv', index=False)

# Specify the date in the format 'dd-mmm'
date = '27-july'  # Change this date as needed
fetch_individual_events(date)

Found 354 events
               Start Time        Title                                 Sub-Title       Gold Name Gold Country       Silver Name Silver Country   Bronze Name Bronze Country
2024-07-27 09:00:00+00:00       Diving Women's Synchronised 3m Springboard Final           China          CHN     United States            USA Great Britain            GBR
2024-07-27 12:30:00+00:00 Cycling Road             Women's Individual Time Trial     Grace BROWN          AUS    Anna HENDERSON            GBR  Chloe DYGERT            USA
2024-07-27 14:32:00+00:00 Cycling Road               Men's Individual Time Trial Remco EVENEPOEL          BEL     Filippo GANNA            ITA Wout van AERT            BEL
2024-07-27 18:42:00+00:00     Swimming                Men's 400m Freestyle Final  Lukas MAERTENS          GER Elijah WINNINGTON            AUS    KIM Woomin            KOR
2024-07-27 18:52:00+00:00     Swimming              Women's 400m Freestyle Final  Ariarne TITMUS          AUS   Summer McIN

In [54]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

def fetch_head_to_head_events(date):
    # Load IOC Codes
    ioc_codes = pd.read_csv('ioc_codes.csv')

    # URL of the medal events page
    url = f'https://olympics.com/en/paris-2024/schedule/{date}?medalEvents=true'

    # Send a GET request to fetch the raw HTML content
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Find the section containing events
    medal_events_section = soup.find_all('div', class_='emotion-srm-1ofqig9')

    # Debugging: Check if the sections were found
    print(f"Found {len(medal_events_section)} events")

    # List to hold event data
    events_data = []

    # Loop through each medal event and extract details
    for event in medal_events_section:
        try:
            # Check if the event is head-to-head
            h2h_competitors = event.find_all('div', class_='h2h-competitor')
            if not h2h_competitors:
                continue  # Skip non-head-to-head events

            event_time_tag = event.find('time', class_='emotion-srm-1myzqq1')
            event_title_tag = event.find('span', class_='discipline-title')
            event_sub_title_tag = event.find('span', class_='discipline-sub-title')
            event_medal_tag = event.find('div', class_='emotion-srm-2v3gnk')

            start_time = event_time_tag['datetime'] if event_time_tag and 'datetime' in event_time_tag.attrs else "N/A"
            title = event_title_tag.text.strip() if event_title_tag else "N/A"
            sub_title = event_sub_title_tag.text.strip() if event_sub_title_tag else "N/A"
            medal_event = event_medal_tag.find('img')['title'] if event_medal_tag and event_medal_tag.find('img') else "No"

            # Only include medal events
            if medal_event != "No":
                gold_name, gold_country = "N/A", "N/A"
                silver_name, silver_country = "N/A", "N/A"
                bronze_name, bronze_country = "N/A", "N/A"

                for competitor in h2h_competitors:
                    code_tag = competitor.find('span', class_='e1oix8v91 emotion-srm-1suglh8') or competitor.find('span', class_='e1oix8v91 emotion-srm-18k92tf')
                    if code_tag:
                        # Case 1: Athlete name and IOC code are provided
                        ioc_code = code_tag.text.strip()
                        if code_tag == competitor.find('span', class_='e1oix8v91 emotion-srm-1suglh8'):
                            name_tag = competitor.find('span', class_='e1oix8v91 emotion-srm-8q6t5w')
                        else: 
                            name_tag = competitor.find('span', class_='e1oix8v91 emotion-srm-47h3k2')
                        country = name_tag.text.strip() if name_tag else "N/A"
                        # print(f"Case 2 - Athlete: {country}, IOC Code: {ioc_code}")  # Debug print
                    else:
                        # Case 2: Country name is provided, lookup IOC code
                        country_tag = competitor.find('span', class_='e1oix8v91 emotion-srm-8q6t5w') or competitor.find('span', class_='e1oix8v91 emotion-srm-47h3k2')
                        country = country_tag.text.strip() if country_tag else "N/A"
                        ioc_code = ioc_codes[ioc_codes['country'] == country]['ioc_code'].values[0] if country in ioc_codes['country'].values else "N/A"
                        # print(f"Case 1 - Country: {country}, IOC Code: {ioc_code}")  # Debug print

                    medal_tag = competitor.find('div', class_='emotion-srm-hyxlzm e1uhuzof0')
                    medal_img = medal_tag.find('img') if medal_tag else None
                    medal = medal_img['alt'] if medal_img else "N/A"

                    if "Gold" in medal:
                        gold_name, gold_country = country, ioc_code
                    elif "Silver" in medal:
                        silver_name, silver_country = country, ioc_code
                    elif "Bronze" in medal:
                        bronze_name, bronze_country = country, ioc_code

                events_data.append({
                    'Start Time': start_time,
                    'Title': title,
                    'Sub-Title': sub_title,
                    'Gold Name': gold_name,
                    'Gold Country': gold_country,
                    'Silver Name': silver_name,
                    'Silver Country': silver_country,
                    'Bronze Name': bronze_name,
                    'Bronze Country': bronze_country
                })
        except Exception as e:
            print(f"Error processing event: {e}")

    # Create a DataFrame
    df = pd.DataFrame(events_data)

    # Drop duplicate entries
    df = df.drop_duplicates()

    # Convert 'Start Time' to datetime format and filter by input date
    df['Start Time'] = pd.to_datetime(df['Start Time'], errors='coerce')
    
    # Convert input date to '2024-07-27' format for comparison
    comparison_date = datetime.strptime(f'2024-{date}', '%Y-%d-%B').date()
    df = df[df['Start Time'].dt.date == comparison_date]

    # Sort by 'Start Time'
    df = df.sort_values(by='Start Time').reset_index(drop=True)

    # Pretty print the DataFrame
    print(df.to_string(index=False))

    # # Save the DataFrame to a CSV file
    # df.to_csv(f'olympic_team_events_{date}.csv', index=False)

# Specify the date in the format 'dd-mmm'
date = '27-july'  # Change this date as needed
fetch_head_to_head_events(date)

FileNotFoundError: [Errno 2] No such file or directory: 'ioc_codes.csv'