In [52]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import os

# Make sure the output directory exists
# Assuming you run this from a script in a 'scripts' folder, and 'data' and 'assets' are siblings
os.makedirs('../assets', exist_ok=True)
data_file_path = '../data/bubble_plot.csv' # Ensure this path is correct

# Load the data
try:
    df = pd.read_csv(data_file_path)
except FileNotFoundError:
    print(f"Error: The data file was not found at {data_file_path}")
    print("Please ensure the path to 'bubble_plot.csv' is correct.")
    exit()

# Clean the data
df['GDP'] = pd.to_numeric(df['GDP, PPP (current international $) [NY.GDP.MKTP.PP.CD]'].str.replace(',', ''), errors='coerce')
df['Population'] = pd.to_numeric(df['Population, total [SP.POP.TOTL]'], errors='coerce')
df['Departures'] = pd.to_numeric(df['International tourism, number of departures [ST.INT.DPRT]'].str.replace(',', ''), errors='coerce')
df['PerCapita'] = pd.to_numeric(df['International Tourism Departures per capita'], errors='coerce')

# Create continent mapping
continent_mapping = {
    'Denmark': 'Europe', 'Sweden': 'Europe', 'Norway': 'Europe', 'Finland': 'Europe', 'Iceland': 'Europe',
    'United Kingdom': 'Europe', 'Germany': 'Europe', 'France': 'Europe', 'Italy': 'Europe', 'Spain': 'Europe',
    'Netherlands': 'Europe', 'Belgium': 'Europe', 'Switzerland': 'Europe', 'Austria': 'Europe', 'Poland': 'Europe',
    'Czech Republic': 'Europe', 'Hungary': 'Europe', 'Romania': 'Europe', 'Portugal': 'Europe', 'Greece': 'Europe',
    'Ireland': 'Europe', 'Luxembourg': 'Europe', 'Slovenia': 'Europe', 'Croatia': 'Europe', 'Lithuania': 'Europe',
    'Latvia': 'Europe', 'Estonia': 'Europe', 'Slovakia': 'Europe', 'Bulgaria': 'Europe',
    'United States': 'North America', 'Canada': 'North America', 'Mexico': 'North America',
    'China': 'Asia', 'Japan': 'Asia', 'Korea, Rep.': 'Asia', 'India': 'Asia', 'Indonesia': 'Asia',
    'Singapore': 'Asia', 'Malaysia': 'Asia', 'Thailand': 'Asia', 'Vietnam': 'Asia', 'Philippines': 'Asia',
    'Hong Kong SAR, China': 'Asia', 'Russian Federation': 'Asia', 'Taiwan, China': 'Asia',
    'Australia': 'Oceania', 'New Zealand': 'Oceania',
    'South Africa': 'Africa', 'Egypt, Arab Rep.': 'Africa', 'Nigeria': 'Africa', 'Kenya': 'Africa', 'Morocco': 'Africa',
    'Brazil': 'South America', 'Argentina': 'South America', 'Chile': 'South America', 'Colombia': 'South America',
    'Peru': 'South America', 'Venezuela, RB': 'South America',
    'Saudi Arabia': 'Middle East', 'United Arab Emirates': 'Middle East', 'Turkey': 'Middle East', 'Israel': 'Middle East',
    'Albania': 'Europe', 'Belarus': 'Europe', 'Bosnia and Herzegovina': 'Europe', 'Kazakhstan': 'Asia',
    'Mongolia': 'Asia', 'North Korea': 'Asia', 'Sri Lanka': 'Asia', 'Pakistan': 'Asia', 'Bangladesh': 'Asia',
    'Myanmar': 'Asia', 'Nepal': 'Asia', 'Lebanon': 'Middle East', 'Jordan': 'Middle East', 'Kuwait': 'Middle East',
    'Bahrain': 'Middle East', 'Qatar': 'Middle East', 'Oman': 'Middle East', 'Yemen': 'Middle East',
    'Ethiopia': 'Africa', 'Ghana': 'Africa', 'Tanzania': 'Africa', 'Uganda': 'Africa', 'Zambia': 'Africa',
    'Zimbabwe': 'Africa', 'Cameroon': 'Africa', 'Senegal': 'Africa', 'Mali': 'Africa', 'Niger': 'Africa',
    'Chad': 'Africa', 'Sudan': 'Africa', 'Libya': 'Africa',
    'Tunisia': 'Africa', 'Algeria': 'Africa',
    'Ecuador': 'South America', 'Bolivia': 'South America', 'Paraguay': 'South America', 'Uruguay': 'South America',
    'Papua New Guinea': 'Oceania', 'Fiji': 'Oceania', 'Solomon Islands': 'Oceania'
}

df['Continent'] = df['Country Name'].map(continent_mapping).fillna('Other')
df_clean = df.dropna(subset=['GDP', 'PerCapita', 'Population', 'Continent']).copy()
years = sorted(df_clean['Time'].unique())
years = [year for year in years if 1996 <= year <= 2019]

continent_colors = {
    'Europe': '#374c80', 'North America': '#7a5195', 'Asia': '#bc5090',
    'Oceania': '#ef5675', 'Africa': '#ff764a', 'South America': '#ffa600',
    'Middle East': '#c1666b'
}
df_clean = df_clean[df_clean['Continent'] != 'Other'].copy()

if df_clean.empty or not years:
    print("Dataframe is empty after cleaning or no valid years found. Cannot proceed.")
    exit()

df_clean['GDP_Billion'] = df_clean['GDP'] / 1e9
df_clean['IsDenmark'] = df_clean['Country Name'] == 'Denmark'

fig = go.Figure()
current_trace_idx = 0
# Add legend traces
for continent in continent_colors.keys():
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', name=continent,
                             marker=dict(size=10, color=continent_colors[continent]),
                             showlegend=True, legendgroup=continent))
    current_trace_idx += 1
fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', name='🇩🇰 Denmark',
                         marker=dict(size=15, color='#003f5c', line=dict(width=3, color='#C60C30'), symbol='star'),
                         showlegend=True, legendgroup='Denmark'))
current_trace_idx += 1

data_trace_indices = []
continent_order_for_traces = []
first_year = years[0]

# Add initial data traces for the first year
for continent in continent_colors.keys():
    data_trace_indices.append(current_trace_idx)
    continent_order_for_traces.append(continent)
    continent_data_initial = df_clean[(df_clean['Time'] == first_year) & (df_clean['Continent'] == continent) & (~df_clean['IsDenmark'])]
    fig.add_trace(go.Scatter(
        x=continent_data_initial['GDP_Billion'], y=continent_data_initial['PerCapita'],
        mode='markers', name=continent, text=continent_data_initial['Country Name'],
        hovertemplate='<b>%{text}</b><br>GDP: $%{x:.1f}B<br>Tourism/Capita: %{y:.3f}<br><extra></extra>',
        marker=dict(size=np.sqrt(continent_data_initial['Population'] / 2000000) * 10 if not continent_data_initial.empty else [],
                    sizemin=6, color=continent_colors[continent], opacity=0.7, line=dict(width=1, color='white')),
        visible=True, legendgroup=continent, showlegend=False))
    current_trace_idx += 1

data_trace_indices.append(current_trace_idx)
continent_order_for_traces.append('Denmark')
denmark_data_initial = df_clean[(df_clean['Time'] == first_year) & (df_clean['IsDenmark'])]
fig.add_trace(go.Scatter(
    x=denmark_data_initial['GDP_Billion'], y=denmark_data_initial['PerCapita'],
    mode='markers', name='Denmark', text=(['Denmark'] * len(denmark_data_initial)) if not denmark_data_initial.empty else [],
    hovertemplate='<b>Denmark</b><br>GDP: $%{x:.1f}B<br>Tourism/Capita: %{y:.3f}<br><extra></extra>',
    marker=dict(size=np.sqrt(denmark_data_initial['Population'] / 1500000) * 15 if not denmark_data_initial.empty else 15,
                color='#003f5c', line=dict(width=3, color='#C60C30'), symbol='star'),
    visible=True, legendgroup='Denmark', showlegend=False))

# --- Create Frames for Animation ---
plotly_frames = []
for year in years:
    frame_data_payload = []
    for continent_or_denmark in continent_order_for_traces:
        if continent_or_denmark == 'Denmark':
            current_year_data = df_clean[(df_clean['Time'] == year) & (df_clean['IsDenmark'])]
            marker_size = (np.sqrt(current_year_data['Population'] / 1500000) * 15).tolist() if not current_year_data.empty else []
            trace_update_dict = {'x': current_year_data['GDP_Billion'].tolist(),
                                 'y': current_year_data['PerCapita'].tolist(),
                                 'marker': {'size': marker_size},
                                 'text': (['Denmark'] * len(current_year_data)) if not current_year_data.empty else []}
        else:
            current_year_data = df_clean[(df_clean['Time'] == year) & (df_clean['Continent'] == continent_or_denmark) & (~df_clean['IsDenmark'])]
            marker_size = (np.sqrt(current_year_data['Population'] / 2000000) * 10).tolist() if not current_year_data.empty else []
            trace_update_dict = {'x': current_year_data['GDP_Billion'].tolist(),
                                 'y': current_year_data['PerCapita'].tolist(),
                                 'marker': {'size': marker_size},
                                 'text': current_year_data['Country Name'].tolist()}
        frame_data_payload.append(trace_update_dict)
    plotly_frames.append(go.Frame(data=frame_data_payload, name=str(year), traces=data_trace_indices))
fig.frames = plotly_frames

# Create slider steps - MODIFIED TO USE METHOD "ANIMATE"
steps = []
for year in years:
    step = dict(
        method="animate", # Changed from "restyle"
        args=[[str(year)],  # Target frame name to animate to
              {"frame": {"duration": 0, "redraw": True}, # duration 0 for immediate jump when slider is moved
               "mode": "immediate",
               "transition": {"duration": 0}} # No transition for manual slider interaction
             ],
        label=str(year) # Label of the slider step, matches frame name
    )
    steps.append(step)

# Update layout
fig.update_layout(
    title="Denmark's Tourism Journey (1996-2019)",
    xaxis=dict(title="GDP (billions USD PPP)", type='log',
               range=[np.log10(max(0.01, df_clean['GDP_Billion'].min(skipna=True) * 0.8 if pd.notna(df_clean['GDP_Billion'].min(skipna=True)) else 0.01)),
                      np.log10(max(1, df_clean['GDP_Billion'].max(skipna=True) * 1.2 if pd.notna(df_clean['GDP_Billion'].max(skipna=True)) else 1))],
               tickmode='array',
               tickvals=[0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000,2000,5000,10000,20000],
               ticktext=['0.1B','0.2B','0.5B','1B','2B','5B','10B','20B','50B','100B','200B','500B','1T','2T','5T','10T','20T'],
               showgrid=False),
    yaxis=dict(title="International Tourism Departures per Capita",
               range=[-0.1, max(0.1, df_clean['PerCapita'].max(skipna=True) * 1.15 if pd.notna(df_clean['PerCapita'].max(skipna=True)) else 0.1)],
               showgrid=True, gridcolor='lightgray'),
    showlegend=True,
    legend=dict(orientation="v", yanchor="middle", y=0.5, xanchor="left", x=1.02),
    plot_bgcolor='white', width=None, height=None, autosize=True,
    updatemenus=[{
        'type': 'buttons', 'showactive': False, 'y': -0.15, 'x': 0.5, 'xanchor': 'center',
        'yanchor': 'top', 'direction': 'left', 'pad': {'t': 10,'b':10, 'r': 10},
        'buttons': [
            {'label': '▶️ Play', 'method': 'animate',
             'args': [None,  # Animate all frames by their names (None uses fig.frames)
                      {'frame': {'duration': 1200, 'redraw': True},
                       'fromcurrent': True,
                       'transition': {'duration': 500, 'easing': 'quadratic-in-out'},
                       'mode': 'immediate',
                       'autoplay': False}]},  # Explicitly set autoplay to False
            {'label': '⏸️ Pause', 'method': 'animate',
             'args': [[None],
                      {'frame': {'duration': 0, 'redraw': False},
                       'mode': 'immediate',
                       'transition': {'duration': 0}}]}
        ]
    }],
    sliders=[{
        'active': 0,
        'currentvalue': {'prefix': 'Year: ', 'visible': True, 'xanchor': 'right'},
        'pad': {'t': 20, 'b': 40},
        'steps': steps
    }]
)
output_filename = '../assets/denmark_tourism_bubble_final_animation.html'
fig.write_html(
    output_filename,
    include_plotlyjs='cdn',
    config={'responsive': True, 'displayModeBar': True, 'modeBarButtonsToRemove': ['lasso2d', 'select2d']}
)
print(f"Final animated plot saved: {output_filename}")

Final animated plot saved: ../assets/denmark_tourism_bubble_final_animation.html
