In [1]:
import pandas as pd
import plotly.express as px
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State

# Load the CSV file into a DataFrame
df = pd.read_csv("flags.csv")

# Define a dictionary to map color names to their respective hex values
color_map = {
    "green": "#008000",
    "turquoise": "#00FFFF",
    "blue": "#0000FF",
    "violet": "#800080",
    "pink": "#FF00FF",
    "red": "#FF0000",
    "orange": "#FFA500",
    "yellow": "#FFFF00",
    "white": "#FFFFFF",
    "black": "#000000",
}

# Initialize the Dash application
app = dash.Dash(__name__)

# Define the layout of the application
app.layout = html.Div(
    [
        dcc.Store(
            id="selected-color", data=""
        ),  # Store component to hold the selected color data
        dcc.Dropdown(
            id="region-dropdown",
            options=[{"label": "Worldwide", "value": "worldwide"}]
            + [
                {"label": region.replace("_", " "), "value": region}
                for region in df["Region"].unique()
            ],  # Dropdown options created dynamically from DataFrame
            value="worldwide",  # Default value for the dropdown
            clearable=False,
        ),
        dcc.Graph(
            id="pie-chart", style={"backgroundColor": "#D3D3D3"}
        ),  # Graph component for the pie chart
        dcc.RangeSlider(
            id="year-slider",
            min=1874,
            max=2024,
            value=[1874, 2024],
            marks={str(year): str(year) for year in range(1874, 2025, 10)},
            step=10,
        ),  # RangeSlider for selecting a year range
        html.Button(
            "Close", id="close-button", n_clicks=0, style={"display": "none"}
        ),  # Button to close the color selection
        html.Div(
            id="flag-container", style={"backgroundColor": "#D3D3D3", "padding": "10px"}
        ),  # Container for displaying flag images
    ],
    style={"backgroundColor": "#D3D3D3"},
)

# Function to filter data based on the selected year range and region
def filter_data(df, start_year, end_year, selected_region):
    def is_relevant(row):
        # Determine the year the flag was added, if it's not specified, set it to negative infinity
        year_added = (
            int(row["Year Added"]) if row["Year Added"] != "?" else float("-inf")
        )
        # Determine the year the flag was removed, if it's still in use, set it to positive infinity
        year_removed = (
            int(row["Year Removed"])
            if row["Year Removed"] != "still in use"
            else float("inf")
        )
        # Check if the flag's usage period overlaps with the selected year range
        return year_added <= end_year and year_removed >= start_year

    # Apply the relevance check to each row and filter the DataFrame accordingly
    filtered_df = df[df.apply(is_relevant, axis=1)]

    # If a specific region is selected, further filter the DataFrame by the region
    if selected_region != "worldwide":
        filtered_df = filtered_df[filtered_df["Region"] == selected_region]
    return filtered_df

# Function to generate tooltip text for flags
def generate_tooltip(flag_info):
    # Start with the state's name and region
    tooltip = f"State:\n  {flag_info['State Name']} ({flag_info['Region'].replace('_', ' ')})\n"
    # Add the flag type if available
    if pd.notna(flag_info["Flag Name"]):
        tooltip += f"Flag Type:\n  {flag_info['Flag Name']}\n"
    # Add the duration of the flag's usage
    tooltip += f"Duration:\n  {flag_info['Year Added']} - {flag_info['Year Removed']}\nColors:\n"
    # Add the colors and their respective percentages
    for color in color_map.keys():
        if flag_info[color] > 0:
            tooltip += f"  {color.capitalize()}: {flag_info[color]}%\n"
    # Add any additional info if available
    if pd.notna(flag_info["Additional Info"]):
        tooltip += f"Additional Info:\n {flag_info['Additional Info']}"
    return tooltip

# Callback to update the pie chart and dropdown options based on the selected year range and region
@app.callback(
    [Output("pie-chart", "figure"), Output("region-dropdown", "options")],
    [Input("year-slider", "value"), Input("region-dropdown", "value")],
)
def update_pie_chart(year_range, selected_region):
    start_year, end_year = year_range
    filtered_df = filter_data(
        df, start_year, end_year, selected_region
    )  # Filter data based on year and region

    # Get unique regions for dropdown options
    regions = df["Region"].unique()
    dropdown_options = [{"label": "Worldwide", "value": "worldwide"}] + [
        {"label": region.replace("_", " "), "value": region} for region in regions
    ]

    # Prepare text to display the selected year range
    year_range_text = (
        f"{start_year} - {end_year}" if start_year != end_year else str(start_year)
    )
    # Prepare text to display the selected region
    selected_region_text = (
        selected_region.replace("_", " ")
        if selected_region != "worldwide"
        else "Worldwide"
    )

    # List of color columns to analyze
    color_columns = [
        "green",
        "turquoise",
        "blue",
        "violet",
        "pink",
        "red",
        "orange",
        "yellow",
        "white",
        "black",
    ]

    # Title for the pie chart
    title = "Distribution of Colors on National Flags"
    # Calculate the average percentage of each color in the filtered dataset
    data = filtered_df[color_columns].mean().reset_index()
    data.columns = ["color", "percentage"]
    data["percentage"] = (data["percentage"]).round(1)
    # Calculate the number of flags that contain each color
    data["count"] = filtered_df[color_columns].gt(0).sum().values
    # Calculate the average share of each color in the flags that contain it
    data["average_share"] = [
        filtered_df[filtered_df[color] > 0][color].mean() for color in color_columns
    ]

    total_flags = len(filtered_df)  # Calculate the total number of flags
    data["percent_of_flags"] = (data["count"] / total_flags * 100).round(1)  # Calculate the percent of flags with each color

    # Create the pie chart using Plotly Express
    fig = px.pie(data, values="percentage", names="color", title=title)

    # Prepare hover text template for the pie chart
    hover_template = [
        f"<b>{color}</b><br><i>Amount of Flags w/ this Color:</i> {count}<br><i>Percent of Flags w/ this Color:</i> {percent_of_flags:.1f}%<br><i>Total Area on all Flags:</i> %{{value:.1f}}%<br><i>Avg Area on Flags w/ this Color:</i> {avg_share:.1f}%"
        for color, count, percent_of_flags, avg_share in zip(
            data["color"], data["count"], data["percent_of_flags"], data["average_share"]
        )
    ]

    # Determine the position of text labels on the pie chart
    text_positions = [
        "inside" if percentage >= 1 else "outside" for percentage in data["percentage"]
    ]

    # Update the traces in the pie chart with hover templates and marker colors
    fig.update_traces(
        textposition=text_positions,
        textinfo="percent+label" if any(data["percentage"] < 1) else "percent",
        hovertemplate=hover_template,
        marker=dict(colors=[color_map[color] for color in data["color"]]),
    )

    # Update the layout of the pie chart with title and annotations
    fig.update_layout(
        title={
            "text": "Distribution of Colors on National Flags",
            "font": {"size": 24, "weight": "bold"},
            "y": 0.95,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        },
        annotations=[
            dict(
                text=f"<b>Region:</b> {selected_region_text} | <b>Time:</b> {year_range_text}",
                x=0.5,
                y=1.15,
                xref="paper",
                yref="paper",
                showarrow=False,
                font=dict(size=18),
            )
        ],
        paper_bgcolor="#D3D3D3",
    )

    # Return the updated figure and dropdown options
    return fig, dropdown_options

# Callback to display flags based on the selected color from the pie chart and other inputs
@app.callback(
    [
        Output("flag-container", "children"),  # Output: children of the flag container
        Output("selected-color", "data"),      # Output: data for the selected color
        Output("close-button", "style"),       # Output: style for the close button
    ],
    [
        Input("pie-chart", "clickData"),       # Input: data from clicks on the pie chart
        Input("year-slider", "value"),         # Input: value from the year slider
        Input("region-dropdown", "value"),     # Input: value from the region dropdown
        Input("close-button", "n_clicks"),     # Input: number of clicks on the close button
    ],
    [State("selected-color", "data")],         # State: current selected color
)
def display_flags(clickData, year_range, selected_region, n_clicks, selected_color):
    start_year, end_year = year_range
    filtered_df = filter_data(df, start_year, end_year, selected_region)  # Filter data based on the selected year range and region

    # Determine which input triggered the callback
    ctx = dash.callback_context
    button_triggered = (
        ctx.triggered[0]["prop_id"].split(".")[0] if ctx.triggered else "None"
    )

    # If the close button was clicked, reset the selected color
    if button_triggered == "close-button":
        selected_color = ""
    else:
        # Otherwise, update the selected color based on the pie chart click
        clicked_color = clickData["points"][0]["label"] if clickData else None
        selected_color = "" if clicked_color == selected_color else clicked_color

    # If no color is selected, show all flags
    if not selected_color:
        color_df = filtered_df[["State Name", "Image URL"]].sort_values(by="State Name")
        close_button_style = {"display": "none"}  # Hide the close button
    else:
        # If a color is selected, filter the DataFrame to show only flags with the selected color
        color_df = filtered_df[["State Name", "Image URL", selected_color]]
        color_df = color_df[color_df[selected_color] > 0].sort_values(
            by=selected_color, ascending=False
        )
        close_button_style = {"display": "block"}  # Show the close button

    # Generate flag images with tooltips
    flag_images = [
        html.Div(
            [
                html.Img(
                    src=row["Image URL"],
                    style={"width": "100px", "height": "auto"},
                    title=generate_tooltip(
                        df[(df["State Name"] == row["State Name"]) & (df["Image URL"] == row["Image URL"])].iloc[0]
                    ),
                ),
                html.P(row["State Name"]),
            ],
            style={"margin": "10px", "display": "inline-block"},
        )
        for _, row in color_df.iterrows()
    ]

    # Style the container for the flags
    container_style = {
        "padding": "10px",
        "margin-top": "20px",
        "backgroundColor": "#D3D3D3",
    }
    if selected_color:
        container_style["border"] = f"5px solid {color_map[selected_color]}"  # Highlight the container with the selected color

    # Return the list of flag images, the selected color, and the style for the close button
    return (
        html.Div(flag_images, style=container_style),
        selected_color,
        close_button_style,
    )

# Run the Dash server if this script is executed directly
if __name__ == "__main__":
    app.run_server(debug=True)



In [2]:
import matplotlib.pyplot as plt
from ipywidgets import interact, widgets

data = pd.read_csv("flags.csv")

# Convert 'Year Added' and 'Year Removed' columns to numerical values
data["Year Added"] = pd.to_numeric(data["Year Added"], errors="coerce")
data["Year Removed"] = (
    data["Year Removed"].replace("still in use", pd.Timestamp.now().year).astype(int)
)

# Replace underscores with spaces in the 'Region' column
data["Region"] = data["Region"].str.replace("_", " ")

# Add a Worldwide region
data["Region"] = data["Region"].fillna("Worldwide")

# Melt the dataframe to have 'Color' and 'Percentage' columns
data_melted = data.melt(
    id_vars=["Region", "State Name", "Year Added", "Year Removed"],
    value_vars=[
        "green",
        "turquoise",
        "blue",
        "violet",
        "pink",
        "red",
        "orange",
        "yellow",
        "white",
        "black",
    ],
    var_name="Color",
    value_name="Percentage",
)

# Drop rows with NaN values in 'Year Added' or 'Year Removed'
data_clean = data_melted.dropna(subset=["Year Added", "Year Removed"])

# Recalculate the years in use
years_data = []

for index, row in data_clean.iterrows():
    for year in range(int(row["Year Added"]), int(row["Year Removed"]) + 1):
        years_data.append(
            [row["Region"], row["State Name"], row["Color"], year, row["Percentage"]]
        )

# Create a new dataframe from the years data
data_years = pd.DataFrame(
    years_data, columns=["Region", "State", "Color", "Year", "Percentage"]
)

# Filter the data for the period between 1874 and 2024
data_filtered = data_years[(data_years["Year"] >= 1874) & (data_years["Year"] <= 2024)]

# Define color values
colors = {
    "green": "#008000",
    "turquoise": "#00FFFF",
    "blue": "#0000FF",
    "violet": "#800080",
    "pink": "#FF00FF",
    "red": "#FF0000",
    "orange": "#FFA500",
    "yellow": "#FFFF00",
    "white": "#FFFFFF",
    "black": "#000000",
}


# Function to plot the data
def plot_data(region, color_filter=None):
    if region == "Worldwide":
        region_data = data_filtered
    else:
        region_data = data_filtered[data_filtered["Region"] == region]

    plt.figure(figsize=(14, 8), dpi=150)
    plt.gca().set_facecolor("#D3D3D3")

    colors_to_plot = (
        region_data["Color"].unique() if color_filter is None else [color_filter]
    )

    for color in colors_to_plot:
        color_data = region_data[region_data["Color"] == color]
        agg_data = color_data.groupby("Year")["Percentage"].mean().reset_index()
        if not agg_data["Percentage"].eq(0).all():
            plt.plot(
                agg_data["Year"],
                agg_data["Percentage"],
                label=color,
                color=colors[color],
            )

    plt.title(f"Color Percentage Over Time in {region} (1874-2024)")
    plt.xlabel("Year")
    plt.ylabel("Percentage (%)")
    plt.ylim(0, None)
    plt.legend(title="Color", loc="upper right")
    plt.grid(True)
    plt.show()


# Interactive widget to select region and color
region_options = ["Worldwide"] + list(data_filtered["Region"].unique())
region_selector = widgets.Dropdown(
    options=region_options, value="Worldwide", description="Region:"
)
color_selector = widgets.Dropdown(
    options=["All"] + list(colors.keys()), description="Color:"
)


def update_plot(region, color):
    color_filter = None if color == "All" else color
    plot_data(region, color_filter)


widgets.interact(update_plot, region=region_selector, color=color_selector)

interactive(children=(Dropdown(description='Region:', options=('Worldwide', 'Middle East', 'Western Europe', '…

<function __main__.update_plot(region, color)>

In [3]:
import seaborn as sns

# Prepare data for the heatmap by decades with swapped axes and formatted decade labels
def prepare_heatmap_data_by_decade(color):
    heatmap_data = data_filtered[data_filtered['Color'] == color]
    heatmap_data['Decade'] = (heatmap_data['Year'] // 10) * 10
    heatmap_data['Decade'] = heatmap_data['Decade'].astype(str) + 's'
    heatmap_data = heatmap_data.groupby(['Region', 'Decade'])['Percentage'].mean().unstack(fill_value=0)
    return heatmap_data

# Function to plot heatmap by decades with swapped axes and formatted decade labels
def plot_heatmap_by_decade(color):
    heatmap_data = prepare_heatmap_data_by_decade(color)
    
    plt.figure(figsize=(14, 8), dpi=150)
    ax = sns.heatmap(heatmap_data, cmap='YlGnBu', annot=True, fmt=".1f")
    
    ax.set_facecolor('#D3D3D3')
    plt.title(f'Heatmap of {color.capitalize()} Percentage by Decade and Region')
    plt.xlabel('Decade')
    plt.ylabel('Region')
    plt.xticks(rotation=45)
    plt.show()

# Interactive widget to select color for heatmap by decades
color_selector = widgets.Dropdown(options=list(colors.keys()), description='Color:')

def update_heatmap_by_decade(color):
    plot_heatmap_by_decade(color)

widgets.interact(update_heatmap_by_decade, color=color_selector)

interactive(children=(Dropdown(description='Color:', options=('green', 'turquoise', 'blue', 'violet', 'pink', …

<function __main__.update_heatmap_by_decade(color)>