In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Make sure the output directory exists
# This directory will store the generated radar chart images
os.makedirs('../assets', exist_ok=True)

# Load data from CSV files
# Assuming these files are located in a 'data' directory one level up from the script
try:
    socioeconomic = pd.read_csv('../data/socioeconomic.csv')
    consumption = pd.read_csv('../data/Consumption.csv')
    regions = pd.read_csv('../data/Region.csv')
except FileNotFoundError as e:
    print(f"Error loading data file: {e}")
    print("Please ensure 'socioeconomic.csv', 'Consumption.csv', and 'Region.csv' are in the '../data/' directory.")
    # In a real script, you might handle this more gracefully, but for this example, we'll exit
    exit()

# Define translations for Danish to English socioeconomic statuses
# This is used for clearer labeling in the radar charts
socio_translations = {
    "Gennemsnitshusstand": "Average Household",
    "Selvstændig": "Self-employed",
    "Lønmodtager på højeste niveau": "High Income",
    "Lønmodtager på mellemniveau": "Medium Income",
    "Lønmodtager på grundniveau": "Basic Income",
    "Arbejdsløs": "Unemployed",
    "Uddannelsessøgende": "Student",
    "Pensionist, efterlønsmodtager": "Pensioner",
    "Ude af erhverv i øvrigt": "Not in Workforce"
}

# Clean and process socioeconomic data
# Renaming columns and adding a translated group column
socio_df = socioeconomic.copy()
socio_df.columns = [col.strip() for col in socio_df.columns] # Remove leading/trailing whitespace from column names
socio_df = socio_df.rename(columns={
    'Socioøkonomisk status': 'Group',
    '09.8 Pakkerejser': 'Packages',
    '11.1 Restaurationstjenester': 'Restaurants',
    '11.2 Overnatningsfaciliteter': 'Accommodation'
})
socio_df['Group_EN'] = socio_df['Group'].map(socio_translations) # Map Danish group names to English
socio_df['Total'] = socio_df['Packages'] + socio_df['Restaurants'] + socio_df['Accommodation'] # Calculate total spending

# Process consumption/age data
# Identifying and renaming relevant columns for age-based data
age_df = consumption.copy()
# Find the exact column names containing 'Package Holidays - Fixed Prices', etc.
# Using error handling in case column names change slightly
try:
    package_col_age = [col for col in age_df.columns if 'Package Holidays - Fixed Prices' in col][0]
    restaurant_col_age = [col for col in age_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
    accommodation_col_age = [col for col in age_df.columns if 'Accommodation Services - Fixed Prices' in col][0]
except IndexError:
    print("Error: Could not find expected 'Fixed Prices' columns in Consumption.csv.")
    print("Please check column names like 'Package Holidays - Fixed Prices'.")
    exit()


age_df = age_df.rename(columns={
    package_col_age: 'Packages',
    restaurant_col_age: 'Restaurants',
    accommodation_col_age: 'Accommodation',
    'AGE': 'Age Group' # Assuming 'AGE' is the column for age groups
})
age_df['Total'] = age_df['Packages'] + age_df['Restaurants'] + age_df['Accommodation'] # Calculate total spending

# Process region data
# Identifying and renaming relevant columns for region-based data
region_df = regions.copy()
# Find the exact column names containing 'Package Holidays - Fixed Prices', etc.
try:
    package_col_region = [col for col in region_df.columns if 'Package Holidays - Fixed Prices' in col][0]
    restaurant_col_region = [col for col in region_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
    accommodation_col_region = [col for col in region_df.columns if 'Accommodation Services - Fixed Prices' in col][0]
except IndexError:
    print("Error: Could not find expected 'Fixed Prices' columns in Region.csv.")
    print("Please check column names like 'Package Holidays - Fixed Prices'.")
    exit()

region_df = region_df.rename(columns={
    package_col_region: 'Packages',
    restaurant_col_region: 'Restaurants',
    accommodation_col_region: 'Accommodation',
    'REGION': 'Region' # Assuming 'REGION' is the column for regions
})
region_df['Total'] = region_df['Packages'] + region_df['Restaurants'] + region_df['Accommodation'] # Calculate total spending


# Function to create a polished radar chart using Matplotlib
# This function takes a DataFrame and generates a radar plot with improved styling
def create_improved_radar_chart(df, title, filename, group_col='Group', display_col=None):
    """
    Create an improved radar chart using Matplotlib with enhanced colors and styling.

    Parameters:
    df: DataFrame with the data. Expected columns: 'Packages', 'Restaurants', 'Accommodation', and a group column.
    title: Title for the chart.
    filename: Output filename (without extension). The chart will be saved as a PNG in the '../assets' directory.
    group_col: Column containing the primary group identifiers (used for data processing).
    display_col: Column containing display names for the groups on the chart axes (if None, uses group_col).
    """
    if display_col is None:
        display_col = group_col

    # Prepare the data for plotting
    groups = df[display_col].tolist()
    n_groups = len(groups)

    # Calculate angles for each group
    angles = np.linspace(0, 2 * np.pi, n_groups, endpoint=False).tolist()
    angles += angles[:1] # Complete the loop

    # Normalize to a 0-100 scale using the same max value for all categories
    # This ensures consistent scale across all charts
    category_max = max(
        df['Packages'].max(),
        df['Restaurants'].max(),
        df['Accommodation'].max()
    )

    # Check if category_max is zero to avoid division by zero
    if category_max == 0:
        print(f"Warning: Maximum spending is 0 for {title}. Cannot create radar chart.")
        return

    # Get values for each category and normalize them
    packages_values = df['Packages'].tolist()
    packages_values += packages_values[:1]
    packages_norm = [(val / category_max * 100) for val in packages_values]

    restaurants_values = df['Restaurants'].tolist()
    restaurants_values += restaurants_values[:1]
    restaurants_norm = [(val / category_max * 100) for val in restaurants_values]

    accommodation_values = df['Accommodation'].tolist()
    accommodation_values += accommodation_values[:1]
    accommodation_norm = [(val / category_max * 100) for val in accommodation_values]

    # Prepare group labels for plotting
    groups += groups[:1]

    # Create the plot with a light grey background for better contrast with vibrant colors
    fig = plt.figure(figsize=(10, 10), facecolor='#f5f5f5') # Light grey background
    ax = fig.add_subplot(111, polar=True)

    # Define MORE VIBRANT colors for each spending category
    # Using distinct and brighter colors
    category_colors = {
        'Packages': '#9b59b6',      # Amethyst (Vibrant Purple)
        'Restaurants': '#2ecc71',   # Emerald (Vibrant Green)
        'Accommodation': '#f1c40f'  # Sunflower (Vibrant Yellow)
    }

    # Define Danish labels for the legend
    danish_labels = {
        'Packages': 'Rejsepakker',
        'Restaurants': 'Restauranter',
        'Accommodation': 'Overnatning'
    }

    # Set up cleaner gridlines
    ax.grid(True, color='#cccccc', alpha=0.8, linestyle='-', linewidth=0.7) # Slightly darker and more prominent grid

    # Set radial ticks and labels (optional, but can add context)
    # Let's add some radial labels back for better context
    ax.set_yticks([25, 50, 75, 100])
    ax.set_yticklabels(['25%', '50%', '75%', '100%'], color='#777777', fontsize=8)
    ax.set_ylim(0, 100) # Ensure the limits are from 0 to 100

    # Remove default radial grid lines and add custom ones
    ax.spines['polar'].set_visible(False) # Keep the outer circle visible if desired, or hide

    # Plot each category as a filled polygon with adjusted alpha and edge color
    # Using slightly higher alpha for more solid look, but still allowing overlap visibility
    alpha_level = 0.5

    # Accommodation (Vibrant Yellow polygon)
    ax.fill(angles, accommodation_norm, color=category_colors['Accommodation'], alpha=alpha_level, edgecolor=category_colors['Accommodation'], linewidth=1.5, label=danish_labels['Accommodation'])
    ax.plot(angles, accommodation_norm, color=category_colors['Accommodation'], linewidth=2.0) # Add a line on top for clarity

    # Restaurants (Vibrant Green polygon)
    ax.fill(angles, restaurants_norm, color=category_colors['Restaurants'], alpha=alpha_level, edgecolor=category_colors['Restaurants'], linewidth=1.5, label=danish_labels['Restaurants'])
    ax.plot(angles, restaurants_norm, color=category_colors['Restaurants'], linewidth=2.0) # Add a line on top for clarity

    # Packages (Vibrant Purple polygon)
    ax.fill(angles, packages_norm, color=category_colors['Packages'], alpha=alpha_level, edgecolor=category_colors['Packages'], linewidth=1.5, label=danish_labels['Packages'])
    ax.plot(angles, packages_norm, color=category_colors['Packages'], linewidth=2.0) # Add a line on top for clarity


    # Set the labels for each demographic group on the axes
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(groups[:-1], color='#555555', fontsize=10) # Slightly larger and darker labels

    # Set title for the chart
    plt.title(title, size=16, color='#333333', pad=20, fontweight='bold') # More prominent title

    # Add a legend
    # Creating a custom legend with colored markers
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color='w',
                 markerfacecolor=category_colors['Packages'], markersize=10,
                 label=danish_labels['Packages']),
        plt.Line2D([0], [0], marker='o', color='w',
                 markerfacecolor=category_colors['Restaurants'], markersize=10,
                 label=danish_labels['Restaurants']),
        plt.Line2D([0], [0], marker='o', color='w',
                 markerfacecolor=category_colors['Accommodation'], markersize=10,
                 label=danish_labels['Accommodation'])
    ]

    # Positioning the legend below the plot
    ax.legend(handles=legend_elements, loc='lower center',
            bbox_to_anchor=(0.5, -0.15), ncol=3, frameon=False, # Adjusted bbox_to_anchor
            handletextpad=0.5, fontsize=10)

    # Adjust layout and save the figure
    plt.tight_layout() # Adjust layout to prevent labels overlapping
    plt.savefig(f'../assets/{filename}.png', dpi=300, bbox_inches='tight') # Save with high resolution
    plt.close() # Close the plot to free up memory

    print(f"Improved radar visualization created and saved to '../assets/{filename}.png'")

# Create radar charts for each demographic type with improved styling

# 1. Socioeconomic Groups
create_improved_radar_chart(
    socio_df,
    'Travel Spending Categories by Socioeconomic Group',
    'danish_travel_socioeconomic_improved',
    'Group',          # Column for data processing
    'Group_EN'        # Column for display on chart axes
)

# 2. Age Groups
create_improved_radar_chart(
    age_df,
    'Travel Spending Categories by Age Group',
    'danish_travel_age_improved',
    'Age Group',      # Column for data processing and display
    'Age Group'
)

# 3. Regional Groups
create_improved_radar_chart(
    region_df,
    'Travel Spending Categories by Region',
    'danish_travel_region_improved',
    'Region',         # Column for data processing and display
    'Region'
)

print("All improved radar visualizations created successfully!")


Final polished radar visualization created and saved to '../assets/danish_travel_socioeconomic_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_age_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_region_final.png'
All final polished radar visualizations created successfully!


In [52]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import os

# Make sure the output directory exists
# Assuming you run this from a script in a 'scripts' folder, and 'data' and 'assets' are siblings
os.makedirs('../assets', exist_ok=True)
data_file_path = '../data/bubble_plot.csv' # Ensure this path is correct

# Load the data
try:
    df = pd.read_csv(data_file_path)
except FileNotFoundError:
    print(f"Error: The data file was not found at {data_file_path}")
    print("Please ensure the path to 'bubble_plot.csv' is correct.")
    exit()

# Clean the data
df['GDP'] = pd.to_numeric(df['GDP, PPP (current international $) [NY.GDP.MKTP.PP.CD]'].str.replace(',', ''), errors='coerce')
df['Population'] = pd.to_numeric(df['Population, total [SP.POP.TOTL]'], errors='coerce')
df['Departures'] = pd.to_numeric(df['International tourism, number of departures [ST.INT.DPRT]'].str.replace(',', ''), errors='coerce')
df['PerCapita'] = pd.to_numeric(df['International Tourism Departures per capita'], errors='coerce')

# Create continent mapping
continent_mapping = {
    'Denmark': 'Europe', 'Sweden': 'Europe', 'Norway': 'Europe', 'Finland': 'Europe', 'Iceland': 'Europe',
    'United Kingdom': 'Europe', 'Germany': 'Europe', 'France': 'Europe', 'Italy': 'Europe', 'Spain': 'Europe',
    'Netherlands': 'Europe', 'Belgium': 'Europe', 'Switzerland': 'Europe', 'Austria': 'Europe', 'Poland': 'Europe',
    'Czech Republic': 'Europe', 'Hungary': 'Europe', 'Romania': 'Europe', 'Portugal': 'Europe', 'Greece': 'Europe',
    'Ireland': 'Europe', 'Luxembourg': 'Europe', 'Slovenia': 'Europe', 'Croatia': 'Europe', 'Lithuania': 'Europe',
    'Latvia': 'Europe', 'Estonia': 'Europe', 'Slovakia': 'Europe', 'Bulgaria': 'Europe',
    'United States': 'North America', 'Canada': 'North America', 'Mexico': 'North America',
    'China': 'Asia', 'Japan': 'Asia', 'Korea, Rep.': 'Asia', 'India': 'Asia', 'Indonesia': 'Asia',
    'Singapore': 'Asia', 'Malaysia': 'Asia', 'Thailand': 'Asia', 'Vietnam': 'Asia', 'Philippines': 'Asia',
    'Hong Kong SAR, China': 'Asia', 'Russian Federation': 'Asia', 'Taiwan, China': 'Asia',
    'Australia': 'Oceania', 'New Zealand': 'Oceania',
    'South Africa': 'Africa', 'Egypt, Arab Rep.': 'Africa', 'Nigeria': 'Africa', 'Kenya': 'Africa', 'Morocco': 'Africa',
    'Brazil': 'South America', 'Argentina': 'South America', 'Chile': 'South America', 'Colombia': 'South America',
    'Peru': 'South America', 'Venezuela, RB': 'South America',
    'Saudi Arabia': 'Middle East', 'United Arab Emirates': 'Middle East', 'Turkey': 'Middle East', 'Israel': 'Middle East',
    'Albania': 'Europe', 'Belarus': 'Europe', 'Bosnia and Herzegovina': 'Europe', 'Kazakhstan': 'Asia',
    'Mongolia': 'Asia', 'North Korea': 'Asia', 'Sri Lanka': 'Asia', 'Pakistan': 'Asia', 'Bangladesh': 'Asia',
    'Myanmar': 'Asia', 'Nepal': 'Asia', 'Lebanon': 'Middle East', 'Jordan': 'Middle East', 'Kuwait': 'Middle East',
    'Bahrain': 'Middle East', 'Qatar': 'Middle East', 'Oman': 'Middle East', 'Yemen': 'Middle East',
    'Ethiopia': 'Africa', 'Ghana': 'Africa', 'Tanzania': 'Africa', 'Uganda': 'Africa', 'Zambia': 'Africa',
    'Zimbabwe': 'Africa', 'Cameroon': 'Africa', 'Senegal': 'Africa', 'Mali': 'Africa', 'Niger': 'Africa',
    'Chad': 'Africa', 'Sudan': 'Africa', 'Libya': 'Africa',
    'Tunisia': 'Africa', 'Algeria': 'Africa',
    'Ecuador': 'South America', 'Bolivia': 'South America', 'Paraguay': 'South America', 'Uruguay': 'South America',
    'Papua New Guinea': 'Oceania', 'Fiji': 'Oceania', 'Solomon Islands': 'Oceania'
}

df['Continent'] = df['Country Name'].map(continent_mapping).fillna('Other')
df_clean = df.dropna(subset=['GDP', 'PerCapita', 'Population', 'Continent']).copy()
years = sorted(df_clean['Time'].unique())
years = [year for year in years if 1996 <= year <= 2019]

continent_colors = {
    'Europe': '#374c80', 'North America': '#7a5195', 'Asia': '#bc5090',
    'Oceania': '#ef5675', 'Africa': '#ff764a', 'South America': '#ffa600',
    'Middle East': '#c1666b'
}
df_clean = df_clean[df_clean['Continent'] != 'Other'].copy()

if df_clean.empty or not years:
    print("Dataframe is empty after cleaning or no valid years found. Cannot proceed.")
    exit()

df_clean['GDP_Billion'] = df_clean['GDP'] / 1e9
df_clean['IsDenmark'] = df_clean['Country Name'] == 'Denmark'

fig = go.Figure()
current_trace_idx = 0
# Add legend traces
for continent in continent_colors.keys():
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', name=continent,
                             marker=dict(size=10, color=continent_colors[continent]),
                             showlegend=True, legendgroup=continent))
    current_trace_idx += 1
fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', name='🇩🇰 Denmark',
                         marker=dict(size=15, color='#003f5c', line=dict(width=3, color='#C60C30'), symbol='star'),
                         showlegend=True, legendgroup='Denmark'))
current_trace_idx += 1

data_trace_indices = []
continent_order_for_traces = []
first_year = years[0]

# Add initial data traces for the first year
for continent in continent_colors.keys():
    data_trace_indices.append(current_trace_idx)
    continent_order_for_traces.append(continent)
    continent_data_initial = df_clean[(df_clean['Time'] == first_year) & (df_clean['Continent'] == continent) & (~df_clean['IsDenmark'])]
    fig.add_trace(go.Scatter(
        x=continent_data_initial['GDP_Billion'], y=continent_data_initial['PerCapita'],
        mode='markers', name=continent, text=continent_data_initial['Country Name'],
        hovertemplate='<b>%{text}</b><br>GDP: $%{x:.1f}B<br>Tourism/Capita: %{y:.3f}<br><extra></extra>',
        marker=dict(size=np.sqrt(continent_data_initial['Population'] / 2000000) * 10 if not continent_data_initial.empty else [],
                    sizemin=6, color=continent_colors[continent], opacity=0.7, line=dict(width=1, color='white')),
        visible=True, legendgroup=continent, showlegend=False))
    current_trace_idx += 1

data_trace_indices.append(current_trace_idx)
continent_order_for_traces.append('Denmark')
denmark_data_initial = df_clean[(df_clean['Time'] == first_year) & (df_clean['IsDenmark'])]
fig.add_trace(go.Scatter(
    x=denmark_data_initial['GDP_Billion'], y=denmark_data_initial['PerCapita'],
    mode='markers', name='Denmark', text=(['Denmark'] * len(denmark_data_initial)) if not denmark_data_initial.empty else [],
    hovertemplate='<b>Denmark</b><br>GDP: $%{x:.1f}B<br>Tourism/Capita: %{y:.3f}<br><extra></extra>',
    marker=dict(size=np.sqrt(denmark_data_initial['Population'] / 1500000) * 15 if not denmark_data_initial.empty else 15,
                color='#003f5c', line=dict(width=3, color='#C60C30'), symbol='star'),
    visible=True, legendgroup='Denmark', showlegend=False))

# --- Create Frames for Animation ---
plotly_frames = []
for year in years:
    frame_data_payload = []
    for continent_or_denmark in continent_order_for_traces:
        if continent_or_denmark == 'Denmark':
            current_year_data = df_clean[(df_clean['Time'] == year) & (df_clean['IsDenmark'])]
            marker_size = (np.sqrt(current_year_data['Population'] / 1500000) * 15).tolist() if not current_year_data.empty else []
            trace_update_dict = {'x': current_year_data['GDP_Billion'].tolist(),
                                 'y': current_year_data['PerCapita'].tolist(),
                                 'marker': {'size': marker_size},
                                 'text': (['Denmark'] * len(current_year_data)) if not current_year_data.empty else []}
        else:
            current_year_data = df_clean[(df_clean['Time'] == year) & (df_clean['Continent'] == continent_or_denmark) & (~df_clean['IsDenmark'])]
            marker_size = (np.sqrt(current_year_data['Population'] / 2000000) * 10).tolist() if not current_year_data.empty else []
            trace_update_dict = {'x': current_year_data['GDP_Billion'].tolist(),
                                 'y': current_year_data['PerCapita'].tolist(),
                                 'marker': {'size': marker_size},
                                 'text': current_year_data['Country Name'].tolist()}
        frame_data_payload.append(trace_update_dict)
    plotly_frames.append(go.Frame(data=frame_data_payload, name=str(year), traces=data_trace_indices))
fig.frames = plotly_frames

# Create slider steps - MODIFIED TO USE METHOD "ANIMATE"
steps = []
for year in years:
    step = dict(
        method="animate", # Changed from "restyle"
        args=[[str(year)],  # Target frame name to animate to
              {"frame": {"duration": 0, "redraw": True}, # duration 0 for immediate jump when slider is moved
               "mode": "immediate",
               "transition": {"duration": 0}} # No transition for manual slider interaction
             ],
        label=str(year) # Label of the slider step, matches frame name
    )
    steps.append(step)

# Update layout
fig.update_layout(
    title="Denmark's Tourism Journey (1996-2019)",
    xaxis=dict(title="GDP (billions USD PPP)", type='log',
               range=[np.log10(max(0.01, df_clean['GDP_Billion'].min(skipna=True) * 0.8 if pd.notna(df_clean['GDP_Billion'].min(skipna=True)) else 0.01)),
                      np.log10(max(1, df_clean['GDP_Billion'].max(skipna=True) * 1.2 if pd.notna(df_clean['GDP_Billion'].max(skipna=True)) else 1))],
               tickmode='array',
               tickvals=[0.1,0.2,0.5,1,2,5,10,20,50,100,200,500,1000,2000,5000,10000,20000],
               ticktext=['0.1B','0.2B','0.5B','1B','2B','5B','10B','20B','50B','100B','200B','500B','1T','2T','5T','10T','20T'],
               showgrid=False),
    yaxis=dict(title="International Tourism Departures per Capita",
               range=[-0.1, max(0.1, df_clean['PerCapita'].max(skipna=True) * 1.15 if pd.notna(df_clean['PerCapita'].max(skipna=True)) else 0.1)],
               showgrid=True, gridcolor='lightgray'),
    showlegend=True,
    legend=dict(orientation="v", yanchor="middle", y=0.5, xanchor="left", x=1.02),
    plot_bgcolor='white', width=None, height=None, autosize=True,
    updatemenus=[{
        'type': 'buttons', 'showactive': False, 'y': -0.15, 'x': 0.5, 'xanchor': 'center',
        'yanchor': 'top', 'direction': 'left', 'pad': {'t': 10,'b':10, 'r': 10},
        'buttons': [
            {'label': '▶️ Play', 'method': 'animate',
             'args': [None,  # Animate all frames by their names (None uses fig.frames)
                      {'frame': {'duration': 1200, 'redraw': True},
                       'fromcurrent': True,
                       'transition': {'duration': 500, 'easing': 'quadratic-in-out'},
                       'mode': 'immediate',
                       'autoplay': False}]},  # Explicitly set autoplay to False
            {'label': '⏸️ Pause', 'method': 'animate',
             'args': [[None],
                      {'frame': {'duration': 0, 'redraw': False},
                       'mode': 'immediate',
                       'transition': {'duration': 0}}]}
        ]
    }],
    sliders=[{
        'active': 0,
        'currentvalue': {'prefix': 'Year: ', 'visible': True, 'xanchor': 'right'},
        'pad': {'t': 20, 'b': 40},
        'steps': steps
    }]
)
output_filename = '../assets/denmark_tourism_bubble_final_animation.html'
fig.write_html(
    output_filename,
    include_plotlyjs='cdn',
    config={'responsive': True, 'displayModeBar': True, 'modeBarButtonsToRemove': ['lasso2d', 'select2d']}
)
print(f"Final animated plot saved: {output_filename}")

Final animated plot saved: ../assets/denmark_tourism_bubble_final_animation.html
