In [2]:
import os
import pandas as pd

# Directory containing the datasets
data_dir = r"C:\Users\sreev\Data Visualization\Olympics 2024\Paris 2024 Summer Olympic Games Data analysis\Exported Data"

# Dictionary to store dataframes
dataframes = {}

# Load all Excel files in the directory
for file in os.listdir(data_dir):
    if file.endswith(".csv"):  # Adjust to ".xlsx" or other extensions if needed
        file_path = os.path.join(data_dir, file)
        df_name = os.path.splitext(file)[0]  # Use the file name (without extension) as the key
        dataframes[df_name] = pd.read_csv(file_path)

# Display loaded DataFrames
for name, df in dataframes.items():
    print(f"Dataset: {name}")
    print(df.info())  # Summary of each DataFrame
    print(df.head())  # First few rows of each DataFrame
    print("-" * 50)


Dataset: Country's Best Disciplines
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 479 entries, 0 to 478
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   country      479 non-null    object
 1   discipline   479 non-null    object
 2   Gold_Medals  479 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 11.4+ KB
None
   country             discipline  Gold_Medals
0      AIN  Trampoline Gymnastics            1
1      AIN                 Tennis            0
2      AIN          Weightlifting            0
3      AIN                 Rowing            0
4  Albania              Wrestling            0
--------------------------------------------------
Dataset: Cross-Discipline Medalists
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   name               4 non-null  

In [7]:
import pandas as pd
import plotly.express as px

# Load the dataset
df_medals_by_country = pd.read_csv(r"C:\Users\sreev\Data Visualization\Olympics 2024\Paris 2024 Summer Olympic Games Data analysis\Exported Data\Total Medals by Country.csv")

# Create a total medals column
df_medals_by_country['Total_Medals'] = df_medals_by_country['Gold'] + df_medals_by_country['Silver'] + df_medals_by_country['Bronze']

# Top 10 countries by total medals
top_countries = df_medals_by_country.sort_values(by="Total_Medals", ascending=False).head(10)

# Visualize top countries by total medals
fig = px.bar(
    top_countries,
    x='country',
    y=['Gold', 'Silver', 'Bronze'],
    title="Top 10 Countries by Total Medals",
    labels={'value': 'Number of Medals', 'country': 'Country'},
    barmode='stack',
    color_discrete_sequence=['#FFD700', '#C0C0C0', '#CD7F32']  # Colors for Gold, Silver, and Bronze
)
fig.update_layout(title_x=0.5, plot_bgcolor='white', xaxis_tickangle=45)
fig.show()

# Visualize top countries by gold medals
fig_gold = px.bar(
    top_countries.sort_values(by="Gold", ascending=False),
    x='country',
    y='Gold',
    title="Top 10 Countries by Gold Medals",
    labels={'Gold': 'Gold Medals', 'country': 'Country'},
    color='Gold',
    color_continuous_scale='sunsetdark'  # Using a valid continuous scale
)
fig_gold.update_layout(title_x=0.5, plot_bgcolor='white', xaxis_tickangle=45)
fig_gold.show()


**Medal Share by Country**

In [11]:
import plotly.express as px
import pandas as pd

# Load dataset
df_medals = pd.read_csv(r"C:\Users\sreev\Data Visualization\Olympics 2024\Paris 2024 Summer Olympic Games Data analysis\Exported Data\Total Medals by Country.csv")

# Prepare data for visualization
df_medals['Total_Medals'] = df_medals['Gold'] + df_medals['Silver'] + df_medals['Bronze']

import plotly.express as px

# Focus on top 10 countries by total medals
# df_medals['Total_Medals'] = df_medals['Gold'] + df_medals['Silver'] + df_medals['Bronze']
top_countries = df_medals.nlargest(10, 'Total_Medals')

# Create a stacked bar chart for medal types
fig = px.bar(
    top_countries,
    x='country',
    y=['Gold', 'Silver', 'Bronze'],
    title="Top 10 Countries by Total Medals",
    labels={'value': 'Number of Medals', 'variable': 'Medal Type', 'country': 'Country'},
    color_discrete_map={'Gold': '#FFD700', 'Silver': '#C0C0C0', 'Bronze': '#CD7F32'}
)
fig.update_layout(
    title_x=0.5,
    barmode='stack',
    plot_bgcolor='white',
    xaxis_tickangle=45
)
fig.show()


In [12]:
fig = px.treemap(
    df_medals,
    path=['country'],
    values='Total_Medals',
    color='Gold',
    color_continuous_scale='Viridis',
    title="Medal Distribution by Country"
)
fig.update_layout(title_x=0.5, plot_bgcolor='white')
fig.show()


In [24]:
import pandas as pd
import dash
from dash import dcc, html, Input, Output
import plotly.express as px

# Load the dataset
df_medalists = pd.read_csv(r'C:\Users\sreev\Data Visualization\Olympics 2024\medallists.csv')

# Preprocess data (convert dates, filter medallists)
df_medalists['medal_date'] = pd.to_datetime(df_medalists['medal_date'])
df_medalists = df_medalists[df_medalists['is_medallist'] == 1]  # Keep only medallists

# Initialize Dash app
app = dash.Dash(__name__)

# Layout
app.layout = html.Div([
    html.H1("Interactive Medal Analysis", style={"textAlign": "center"}),

    # Dropdown for country selection
    html.Label("Select Country:"),
    dcc.Dropdown(
        id="country_dropdown",
        options=[
            {"label": country, "value": country}
            for country in df_medalists["country_long"].unique()
        ],
        value=df_medalists["country_long"].unique()[0],
    ),

    # Tabs for medal type
    dcc.Tabs(
        id="medal_tabs",
        value="Gold",
        children=[
            dcc.Tab(label="Gold Medals", value="Gold"),
            dcc.Tab(label="Silver Medals", value="Silver"),
            dcc.Tab(label="Bronze Medals", value="Bronze"),
        ],
    ),

    # Event chart
    dcc.Graph(id="event_chart"),

    # Athlete chart
    dcc.Graph(id="athlete_chart"),
])

# Callbacks
@app.callback(
    [Output("event_chart", "figure"), Output("athlete_chart", "figure")],
    [Input("country_dropdown", "value"), Input("medal_tabs", "value")]
)
def update_charts(selected_country, selected_medal):
    # Filter data for the selected country and medal type
    filtered_data = df_medalists[
        (df_medalists["country_long"] == selected_country) &
        (df_medalists["medal_type"] == selected_medal)
    ]

    # Event chart
    event_fig = px.bar(
        filtered_data.groupby("event")["medal_type"].count().reset_index(),
        x="event",
        y="medal_type",
        title=f"{selected_medal} Medals by Event ({selected_country})",
        labels={"event": "Event", "medal_type": "Medal Count"},
        color="medal_type",
        color_continuous_scale="Blues"
    )

    # Athlete chart
    athlete_fig = px.bar(
        filtered_data.groupby("name")["medal_type"].count().reset_index(),
        x="name",
        y="medal_type",
        title=f"{selected_medal} Medals by Athletes ({selected_country})",
        labels={"name": "Athlete", "medal_type": "Medal Count"},
        color="medal_type",
        color_continuous_scale="Purples"
    )

    return event_fig, athlete_fig

# Run app
if __name__ == '__main__':
    app.run_server(debug=True, port=8070)  # Use a different port, such as 8060

