In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

In [2]:
import warnings
warnings.filterwarnings("ignore")
pd.set_option("display.max_rows", 15)

In [3]:
df = pd.read_csv("../data/2024_result/2024_result.csv")

In [4]:
df['Host'] = 0 
df.loc[df['Team'] == 'France', 'Host'] = 1

In [5]:
df = df.drop(columns=['Year', 'Medal'])

In [6]:
df

Unnamed: 0,Bronze,Silver,Gold,ID,Sex,Age,Height,Weight,BMI,Host,Population,GDP_Per_Capita,Event,Name,Team,Sport
0,0.875,0.000000,0.125000,110716,0,27.0,172.0,67.0,22.647377,0,3.628642e+07,42348.945461,Alpine Skiing Women's Super G,Georgia Simmerling,Canada,Alpine Skiing
1,0.875,0.000000,0.125000,110716,0,27.0,172.0,67.0,22.647377,0,3.628642e+07,42348.945461,Alpine Skiing Women's Super G,Georgia Simmerling,Canada,Alpine Skiing
2,0.875,0.000000,0.125000,110716,0,27.0,172.0,67.0,22.647377,0,3.628642e+07,42348.945461,Alpine Skiing Women's Super G,Georgia Simmerling,Canada,Alpine Skiing
3,0.100,0.000000,0.900000,96508,1,20.0,185.0,77.0,22.498174,0,2.412716e+07,49896.681101,Archery Men's Individual,Alec Potts,Australia,Archery
4,0.100,0.833333,0.066667,68149,1,21.0,174.0,80.0,26.423570,0,5.124571e+07,27608.247429,Archery Men's Individual,Lee Seung-Yun,South Korea,Archery
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
844,0.300,0.400000,0.300000,74069,0,23.0,162.0,58.0,22.100290,0,1.324171e+09,1717.473878,"Wrestling Women's Lightweight, Freestyle",Sakshi Malik,India,Wrestling
845,0.800,0.000000,0.200000,3537,0,27.0,160.0,58.0,22.656250,0,1.140325e+07,3666.357154,"Wrestling Women's Lightweight, Freestyle",Marwa Al-Amri,Tunisia,Wrestling
846,0.100,0.000000,0.900000,74264,0,23.0,163.0,65.0,24.464602,0,9.507120e+06,5025.405064,"Wrestling Women's Middleweight, Freestyle",Mariya Ruslanovna Mamoshuk,Belarus,Wrestling
847,0.500,0.400000,0.100000,58163,0,21.0,160.0,61.0,23.828125,0,1.269945e+08,38972.340639,"Wrestling Women's Middleweight, Freestyle",Risako Kawai,Japan,Wrestling


In [7]:
df['Sex'] = df['Sex'].astype('category')

In [8]:
df['pred_medal'] = df[['Bronze', 'Silver', 'Gold']].idxmax(axis=1)

In [9]:
df['pred_medal'].value_counts()

pred_medal
Bronze    334
Gold      278
Silver    237
Name: count, dtype: int64

In [10]:
medals_by_sport = df.groupby(['Sport', 'pred_medal']).size().unstack().fillna(0)
medals_by_sport_sorted = medals_by_sport.sum(axis=1).sort_values(ascending=False).index
medals_by_sport_sorted_df = medals_by_sport.loc[medals_by_sport_sorted]

# Plot predicted medals by sport (sorted)
fig = px.bar(medals_by_sport_sorted_df, title='Predicted Medals by Sport', barmode='stack')
fig.show()

In [11]:
# Filter data for sports with significant medal predictions
sports_with_predictions = df.groupby('Sport')['pred_medal'].sum().sort_values(ascending=False).index[:5]
df_filtered = df[df['Sport'].isin(sports_with_predictions)]

# Plot predicted medals by sport and host status
fig = px.bar(df_filtered, x='Sport', color='pred_medal', facet_col='Sex', title='Predicted Medals by Sport: Sex',
             labels={'pred_medal': 'Medal Type', 'Sport': 'Sport'}, category_orders={'pred_medal': ['Bronze', 'Silver', 'Gold']},
             color_discrete_sequence=['lightblue', 'lightgreen', 'lightcoral'])
fig.update_layout(xaxis_title=None, yaxis_title='Number of Medals', legend_title='Medal Type')
fig.show()


In [12]:
# Scatter plot of Age vs predicted medal, colored by host status
fig = px.scatter(df, x='Age', y='pred_medal', color='Sex', title='Age vs Predicted Medal by Sex',
                 labels={'Age': 'Age', 'pred_medal': 'Predicted Medal', 'Sex': 'Sex'},
                 color_discrete_sequence=['magenta', 'darkblue'],
                 category_orders={'pred_medal': ['Bronze', 'Silver', 'Gold'], 'Sex': ['M', 'F']})
fig.update_traces(marker=dict(size=8))
fig.update_layout(xaxis_title='Age', yaxis_title='Predicted Medal', legend_title='Sex')
fig.show()


In [13]:
# Scatter plot of Age vs Height colored by predicted medal
fig = px.scatter(df, x='Age', y='Height', color='pred_medal', title='Age vs Height by Predicted Medal')
fig.update_layout(xaxis_title='Age', yaxis_title='Height')
fig.show()

# Box plot of Weight by predicted medal
fig = px.box(df, x='pred_medal', y='Weight', title='Weight Distribution by Predicted Medal')
fig.update_layout(xaxis_title='Predicted Medal', yaxis_title='Weight')
fig.show()


In [14]:
data = df[['Age', 'Height', 'Weight', 'BMI', 'pred_medal']]

# Scatter plot
fig = px.scatter(data, x='Height', y='Weight', color='pred_medal', size='BMI',
                facet_col='pred_medal', title='Weight vs Height by BMI and Predicted Medal',
                 labels={'Height': 'Height (cm)', 'Weight': 'Weight (kg)', 'BMI': 'BMI'})
fig.show()


In [15]:
# 3D Scatter plot
fig = px.scatter_3d(data, x='Height', y='Weight', z='BMI', color='pred_medal',
                    symbol='pred_medal', title='3D Scatter Plot: Weight, Height, BMI and Predicted Medal',
                    labels={'Height': 'Height (cm)', 'Weight': 'Weight (kg)', 'BMI': 'BMI'})
fig.show()


In [16]:
# Violin plot
fig = px.violin(data, y='Age', x='pred_medal', color='pred_medal',
                box=True, points="all", hover_data=data.columns,
                title='Violin Plot: Age Distribution by Predicted Medal')
fig.show()


In [18]:
fig1 = px.scatter_3d(df, x='Age', y='Height', z='Weight',
                     color='Sport', hover_data=['Event', 'Team'],
                     title='3D Scatter Plot of Age, Height, and Weight')

fig1.show()

In [19]:
# 3D Scatter Plot for BMI, Population, and GDP_Per_Capita
fig2 = px.scatter_3d(df, x='BMI', y='Population', z='GDP_Per_Capita',
                     color='Sport', hover_data=['Event', 'Team'],
                     title='3D Scatter Plot of BMI, Population, and GDP Per Capita')

fig2.show()


In [21]:
# 3D Scatter Plot for Medal Distribution by Event
fig3 = px.scatter_3d(df, x='Gold', y='Silver', z='Bronze',
                     color='Sport', hover_data=['Event', 'Team'],
                     title='3D Scatter Plot of Medal Distribution by Event')

fig3.show()
