In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Make sure the output directory exists
os.makedirs('../assets', exist_ok=True)

# Load data from CSV files
socioeconomic = pd.read_csv('../data/socioeconomic.csv')
consumption = pd.read_csv('../data/Consumption.csv')
regions = pd.read_csv('../data/Region.csv')

# Define translations for Danish to English
socio_translations = {
    "Gennemsnitshusstand": "Average Household",
    "Selvstændig": "Self-employed",
    "Lønmodtager på højeste niveau": "High Income",
    "Lønmodtager på mellemniveau": "Medium Income",
    "Lønmodtager på grundniveau": "Basic Income",
    "Arbejdsløs": "Unemployed", 
    "Uddannelsessøgende": "Student",
    "Pensionist, efterlønsmodtager": "Pensioner",
    "Ude af erhverv i øvrigt": "Not in Workforce"
}

# Clean and process socioeconomic data
socio_df = socioeconomic.copy()
socio_df.columns = [col.strip() for col in socio_df.columns]
socio_df = socio_df.rename(columns={
    'Socioøkonomisk status': 'Group',
    '09.8 Pakkerejser': 'Packages',
    '11.1 Restaurationstjenester': 'Restaurants',
    '11.2 Overnatningsfaciliteter': 'Accommodation'
})
socio_df['Group_EN'] = socio_df['Group'].map(socio_translations)
socio_df['Total'] = socio_df['Packages'] + socio_df['Restaurants'] + socio_df['Accommodation']

# Process consumption/age data
age_df = consumption.copy()
# Identify the fixed prices columns
package_col = [col for col in age_df.columns if 'Package Holidays - Fixed Prices' in col][0]
restaurant_col = [col for col in age_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
accommodation_col = [col for col in age_df.columns if 'Accommodation Services - Fixed Prices' in col][0]

age_df = age_df.rename(columns={
    package_col: 'Packages',
    restaurant_col: 'Restaurants',
    accommodation_col: 'Accommodation'
})
age_df['Total'] = age_df['Packages'] + age_df['Restaurants'] + age_df['Accommodation']

# Process region data
region_df = regions.copy()
# Identify the fixed prices columns
package_col = [col for col in region_df.columns if 'Package Holidays - Fixed Prices' in col][0]
restaurant_col = [col for col in region_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
accommodation_col = [col for col in region_df.columns if 'Accommodation Services - Fixed Prices' in col][0]

region_df = region_df.rename(columns={
    package_col: 'Packages',
    restaurant_col: 'Restaurants',
    accommodation_col: 'Accommodation'
})
region_df['Total'] = region_df['Packages'] + region_df['Restaurants'] + region_df['Accommodation']

# Function to create more polished radar chart that closely matches the example image
def create_polished_radar_chart(df, title, filename, group_col='Group', display_col=None):
    """
    Create a radar chart where axes are demographic groups and polygons are spending categories,
    styled to closely match the example image.
    
    Parameters:
    df: DataFrame with the data
    title: Title for the chart
    filename: Output filename
    group_col: Column containing group identifiers
    display_col: Column containing display names (if None, uses group_col)
    """
    if display_col is None:
        display_col = group_col
        
    # Prepare the data
    groups = df[display_col].tolist()
    n_groups = len(groups)
    
    # Calculate angles for each group (divide the plot / number of variables)
    angles = np.linspace(0, 2 * np.pi, n_groups, endpoint=False).tolist()
    # Complete the loop
    angles += angles[:1]
    
    # Normalize to a 0-100 scale using the same max value for all categories
    # This helps maintain the proper relative scale between categories
    category_max = max(
        df['Packages'].max(),
        df['Restaurants'].max(),
        df['Accommodation'].max()
    )
    
    # Get values for each category
    packages_values = df['Packages'].tolist()
    packages_values += packages_values[:1]  # Complete the loop
    packages_norm = [(val / category_max * 100) for val in packages_values]
    
    restaurants_values = df['Restaurants'].tolist()
    restaurants_values += restaurants_values[:1]  # Complete the loop
    restaurants_norm = [(val / category_max * 100) for val in restaurants_values]
    
    accommodation_values = df['Accommodation'].tolist()
    accommodation_values += accommodation_values[:1]  # Complete the loop
    accommodation_norm = [(val / category_max * 100) for val in accommodation_values]
    
    # Calculate the position of groups to ensure proper labeling
    groups += groups[:1]  # Complete the loop for the labels
    
    # Create the plot with a white background like in the example
    fig = plt.figure(figsize=(10, 10), facecolor='white')
    ax = fig.add_subplot(111, polar=True)
    
    # Define colors exactly matching the example image
    category_colors = {
        'Packages': '#8c68af',      # Moderate purple
        'Restaurants': '#78c17c',   # Moderate green
        'Accommodation': '#f5d98f'  # Light yellow/orange
    }
    
    # Define Danish labels for the legend
    danish_labels = {
        'Packages': 'Rejsepakker',
        'Restaurants': 'Restauranter',
        'Accommodation': 'Overnatning'
    }
    
    # Set up clean gridlines that match the example image
    # Make gridlines lighter and use n concentric circles
    ax.grid(True, color='#e0e0e0', alpha=0.95, linestyle='-', linewidth=0.5)
    
    # Remove default radial grid lines and add custom ones
    ax.set_yticks([25, 50, 75, 100])
    ax.set_yticklabels([])  # Hide labels
    
    # Change the color of the center and spine
    ax.spines['polar'].set_visible(False)
    
    # Plot each category as in the example image
    # Accommodation (yellow polygon)
    ax.fill(angles, accommodation_norm, color=category_colors['Accommodation'], alpha=0.3, edgecolor=category_colors['Accommodation'], linewidth=1.3)
    
    # Restaurants (green polygon)
    ax.fill(angles, restaurants_norm, color=category_colors['Restaurants'], alpha=0.3, edgecolor=category_colors['Restaurants'], linewidth=1.3)
    
    # Packages (purple polygon)
    ax.fill(angles, packages_norm, color=category_colors['Packages'], alpha=0.3, edgecolor=category_colors['Packages'], linewidth=1.3)
    
    # Set the labels for each demographic group
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(groups[:-1], color='#82817f', fontsize=9)
    
    # Set title (no title text in example image, but we'll add one subtly)
    plt.title(title, size=14, color='#707070', pad=15, fontweight='light')
    
    # Add a legend similar to the example
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Packages'], markersize=10, 
                 label=danish_labels['Packages']),
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Restaurants'], markersize=10, 
                 label=danish_labels['Restaurants']),
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Accommodation'], markersize=10, 
                 label=danish_labels['Accommodation'])
    ]
    
    ax.legend(handles=legend_elements, loc='lower center', 
            bbox_to_anchor=(0.5, -0.1), ncol=3, frameon=False, 
            handletextpad=0.5, fontsize=10)
    
    # Save the figure
    plt.tight_layout()
    plt.savefig(f'../assets/{filename}.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Final polished radar visualization created and saved to '../assets/{filename}.png'")

# Create radar charts for each demographic type
# 1. Socioeconomic Groups
create_polished_radar_chart(
    socio_df,
    'Travel Spending Categories by Socioeconomic Group',
    'danish_travel_socioeconomic_final',
    'Group',
    'Group_EN'
)

# 2. Age Groups
create_polished_radar_chart(
    age_df,
    'Travel Spending Categories by Age Group',
    'danish_travel_age_final',
    'Age Group',
    'Age Group'
)

# 3. Regional Groups
create_polished_radar_chart(
    region_df,
    'Travel Spending Categories by Region',
    'danish_travel_region_final',
    'Region',
    'Region'
)

print("All final polished radar visualizations created successfully!")

Final polished radar visualization created and saved to '../assets/danish_travel_socioeconomic_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_age_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_region_final.png'
All final polished radar visualizations created successfully!


In [33]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import os

# Make sure the output directory exists
os.makedirs('../assets', exist_ok=True)

# Load the data
df = pd.read_csv('../data/bubble_plot.csv')

# Clean the data
df['GDP'] = pd.to_numeric(df['GDP, PPP (current international $) [NY.GDP.MKTP.PP.CD]'].str.replace(',', ''), errors='coerce')
df['Population'] = pd.to_numeric(df['Population, total [SP.POP.TOTL]'], errors='coerce')
df['Departures'] = pd.to_numeric(df['International tourism, number of departures [ST.INT.DPRT]'].str.replace(',', ''), errors='coerce')
df['PerCapita'] = pd.to_numeric(df['International Tourism Departures per capita'], errors='coerce')

# Create continent mapping (Denmark is part of Europe)
continent_mapping = {
    'Denmark': 'Europe', 'Sweden': 'Europe', 'Norway': 'Europe', 'Finland': 'Europe', 'Iceland': 'Europe',
    'United Kingdom': 'Europe', 'Germany': 'Europe', 'France': 'Europe', 'Italy': 'Europe', 'Spain': 'Europe',
    'Netherlands': 'Europe', 'Belgium': 'Europe', 'Switzerland': 'Europe', 'Austria': 'Europe', 'Poland': 'Europe',
    'Czech Republic': 'Europe', 'Hungary': 'Europe', 'Romania': 'Europe', 'Portugal': 'Europe', 'Greece': 'Europe',
    'Ireland': 'Europe', 'Luxembourg': 'Europe', 'Slovenia': 'Europe', 'Croatia': 'Europe', 'Lithuania': 'Europe',
    'Latvia': 'Europe', 'Estonia': 'Europe', 'Slovakia': 'Europe', 'Bulgaria': 'Europe',
    'United States': 'North America', 'Canada': 'North America', 'Mexico': 'North America',
    'China': 'Asia', 'Japan': 'Asia', 'Korea, Rep.': 'Asia', 'India': 'Asia', 'Indonesia': 'Asia',
    'Singapore': 'Asia', 'Malaysia': 'Asia', 'Thailand': 'Asia', 'Vietnam': 'Asia', 'Philippines': 'Asia',
    'Hong Kong SAR, China': 'Asia', 'Russian Federation': 'Asia', 'Taiwan, China': 'Asia',
    'Australia': 'Oceania', 'New Zealand': 'Oceania',
    'South Africa': 'Africa', 'Egypt, Arab Rep.': 'Africa', 'Nigeria': 'Africa', 'Kenya': 'Africa', 'Morocco': 'Africa',
    'Brazil': 'South America', 'Argentina': 'South America', 'Chile': 'South America', 'Colombia': 'South America',
    'Peru': 'South America', 'Venezuela, RB': 'South America',
    'Saudi Arabia': 'Middle East', 'United Arab Emirates': 'Middle East', 'Turkey': 'Middle East', 'Israel': 'Middle East',
    
    # Add more countries that might be causing gray bubbles
    'Albania': 'Europe', 'Belarus': 'Europe', 'Bosnia and Herzegovina': 'Europe', 'Kazakhstan': 'Asia',
    'Mongolia': 'Asia', 'North Korea': 'Asia', 'Sri Lanka': 'Asia', 'Pakistan': 'Asia', 'Bangladesh': 'Asia',
    'Myanmar': 'Asia', 'Nepal': 'Asia', 'Lebanon': 'Middle East', 'Jordan': 'Middle East', 'Kuwait': 'Middle East',
    'Bahrain': 'Middle East', 'Qatar': 'Middle East', 'Oman': 'Middle East', 'Yemen': 'Middle East',
    'Ethiopia': 'Africa', 'Ghana': 'Africa', 'Tanzania': 'Africa', 'Uganda': 'Africa', 'Zambia': 'Africa',
    'Zimbabwe': 'Africa', 'Cameroon': 'Africa', 'Senegal': 'Africa', 'Mali': 'Africa', 'Niger': 'Africa',
    'Chad': 'Africa', 'Sudan': 'Africa', 'Libya': 'Africa', 'Egypt, Arab Rep.': 'Africa',
    'Tunisia': 'Africa', 'Algeria': 'Africa',
    'Ecuador': 'South America', 'Bolivia': 'South America', 'Paraguay': 'South America', 'Uruguay': 'South America',
    'Papua New Guinea': 'Oceania', 'Fiji': 'Oceania', 'Solomon Islands': 'Oceania'
}

df['Continent'] = df['Country Name'].map(continent_mapping).fillna('Other')
df_clean = df.dropna(subset=['GDP', 'PerCapita', 'Population']).copy()
years = sorted(df_clean['Time'].unique())
years = [year for year in years if 2000 <= year <= 2019]

# Print countries that weren't mapped (the gray bubbles)
unmapped_countries = df_clean[df_clean['Continent'] == 'Other']['Country Name'].unique()
if len(unmapped_countries) > 0:
    print(f"Countries not mapped to continents (gray bubbles): {list(unmapped_countries)}")

# Color scheme (removing gray "Other" category)
continent_colors = {
    'Europe': '#374c80',
    'North America': '#7a5195',
    'Asia': '#bc5090',
    'Oceania': '#ef5675',
    'Africa': '#ff764a',
    'South America': '#ffa600',
    'Middle East': '#c1666b'
    # Removed 'Other' category to eliminate gray bubbles
}

# Remove 'Other' countries from the dataset
df_clean = df_clean[df_clean['Continent'] != 'Other'].copy()

# Prepare data
df_clean['GDP_Billion'] = df_clean['GDP'] / 1e9
df_clean['Population_Million'] = df_clean['Population'] / 1e6
df_clean['IsDenmark'] = df_clean['Country Name'] == 'Denmark'

# Create figure
fig = go.Figure()

# Create legend traces first (invisible but in legend)
for continent in continent_colors.keys():
    fig.add_trace(go.Scatter(
        x=[None],
        y=[None],
        mode='markers',
        name=continent,
        marker=dict(
            size=10,
            color=continent_colors[continent]
        ),
        showlegend=True,
        legendgroup=continent
    ))

# Add Denmark legend entry
fig.add_trace(go.Scatter(
    x=[None],
    y=[None],
    mode='markers',
    name='🇩🇰 Denmark',
    marker=dict(
        size=15,
        color='#003f5c',
        line=dict(width=3, color='#C60C30'),
        symbol='star'
    ),
    showlegend=True,
    legendgroup='Denmark'
))

# Add traces for each year
trace_year_map = {}

for year in years:
    trace_year_map[year] = []
    year_data = df_clean[df_clean['Time'] == year].copy()
    
    for continent in continent_colors.keys():
        continent_data = year_data[year_data['Continent'] == continent]
        
        if not continent_data.empty:
            # Add regular bubbles (excluding Denmark)
            regular_data = continent_data[~continent_data['IsDenmark']]
            if not regular_data.empty:
                trace_idx = len(fig.data)
                trace_year_map[year].append(trace_idx)
                
                fig.add_trace(go.Scatter(
                    x=regular_data['GDP_Billion'],
                    y=regular_data['PerCapita'],
                    mode='markers',
                    name=continent,
                    text=regular_data['Country Name'],
                    hovertemplate=(
                        '<b>%{text}</b><br>' +
                        'GDP: $%{x:.1f}B<br>' +
                        'Tourism/Capita: %{y:.3f}<br>' +
                        '<extra></extra>'
                    ),
                    marker=dict(
                        size=np.sqrt(regular_data['Population'] / 2000000) * 10,
                        sizemin=6,
                        color=continent_colors[continent],
                        opacity=0.7,
                        line=dict(width=1, color='white')
                    ),
                    visible=bool(year == years[0]),
                    legendgroup=continent,
                    showlegend=False  # Don't show in legend (we already have legend entries)
                ))
            
            # Add Denmark bubble (no text, just star)
            denmark_data = continent_data[continent_data['IsDenmark']]
            if not denmark_data.empty:
                trace_idx = len(fig.data)
                trace_year_map[year].append(trace_idx)
                
                fig.add_trace(go.Scatter(
                    x=denmark_data['GDP_Billion'],
                    y=denmark_data['PerCapita'],
                    mode='markers',  # No text - just markers
                    name='🇩🇰 Denmark',
                    text=['Denmark'],  # For hover only
                    hovertemplate=(
                        '<b>Denmark</b><br>' +
                        'GDP: $%{x:.1f}B<br>' +
                        'Tourism/Capita: %{y:.3f}<br>' +
                        '<extra></extra>'
                    ),
                    marker=dict(
                        size=np.sqrt(denmark_data['Population'] / 1500000) * 15,
                        color='#003f5c',
                        line=dict(width=3, color='#C60C30'),
                        symbol='star'
                    ),
                    visible=bool(year == years[0]),
                    legendgroup='Denmark',
                    showlegend=False  # Don't show in legend (we already have legend entry)
                ))

# Create slider steps
steps = []
for year in years:
    visible_array = [True] * (len(continent_colors) + 1)  # Keep legend traces visible
    
    # Add False for all data traces first
    visible_array.extend([False] * (len(fig.data) - len(visible_array)))
    
    # Make traces for this year visible
    for idx in trace_year_map[year]:
        visible_array[idx] = True
    
    # Create slider step
    step = dict(
        method="restyle",
        args=[{"visible": visible_array}],
        label=str(year)
    )
    steps.append(step)

# Update layout
fig.update_layout(
    title="Denmark's Tourism Journey (2000-2019)",
    xaxis=dict(
        title="GDP per Capita (billions USD PPP)",
        type='log',
        range=[np.log10(df_clean['GDP_Billion'].min() * 0.9), np.log10(df_clean['GDP_Billion'].max() * 1.1)],
        tickmode='array',
        tickvals=[10, 20, 50, 100, 200, 500, 1000, 2000, 5000, 10000],
        ticktext=['10B', '20B', '50B', '100B', '200B', '500B', '1,000B', '2,000B', '5,000B', '10,000B'],
        showgrid=False
    ),
    yaxis=dict(
        title="International Tourism Departures per Capita",
        range=[-0.1, df_clean['PerCapita'].max() * 1.15]
    ),
    showlegend=True,
    legend=dict(
        orientation="v", 
        yanchor="middle", 
        y=0.5, 
        xanchor="left", 
        x=1.02
    ),
    plot_bgcolor='white',
    width=None,  # Make responsive
    height=None,  # Make responsive
    autosize=True,  # Enable autosizing
    sliders=[{
        'active': 0,
        'currentvalue': {'prefix': 'Year: '},
        'pad': {'t': 50},
        'steps': steps
    }]
)

# Save the file
fig.write_html(
    '../assets/denmark_tourism_bubble_interactive.html',
    include_plotlyjs='cdn',
    config={
        'responsive': True,
        'displayModeBar': True,
        'modeBarButtonsToRemove': ['lasso2d', 'select2d']
    }
)

print("Interactive bubble plot created successfully!")
print("File saved: ../assets/denmark_tourism_bubble_interactive.html")
print(f"\nFeatures:")
print("- No more gray bubbles (unmapped countries removed)")
print("- Interactive legend - click continents to show/hide")
print("- Legend stays visible across all years")
print("- Denmark as clean star marker")

Countries not mapped to continents (gray bubbles): ['Armenia', 'Azerbaijan', 'Cambodia', 'Costa Rica', 'Czechia', 'Dominican Republic', 'El Salvador', 'Eswatini', 'Guatemala', 'Iran, Islamic Rep.', 'Lao PDR', 'Macao SAR, China', 'Malta', 'Mauritius', 'Moldova', 'Nicaragua', 'Panama', 'Samoa', 'Seychelles', 'Turkiye', 'Ukraine', 'Vanuatu', 'Bhutan', 'Cyprus', 'Georgia', 'Kyrgyz Republic', 'Sierra Leone', 'Slovak Republic', 'Togo', 'Uzbekistan', 'Honduras', 'Tajikistan', 'Puerto Rico', 'Congo, Dem. Rep.', 'Congo, Rep.', 'Tuvalu', 'Central African Republic', 'Trinidad and Tobago', 'Bermuda', 'Maldives', 'Sao Tome and Principe', 'Turkmenistan', 'Gambia, The']
Interactive bubble plot created successfully!
File saved: ../assets/denmark_tourism_bubble_interactive.html

Features:
- No more gray bubbles (unmapped countries removed)
- Interactive legend - click continents to show/hide
- Legend stays visible across all years
- Denmark as clean star marker
