In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Make sure the output directory exists
os.makedirs('../assets', exist_ok=True)

# Load data from CSV files
socioeconomic = pd.read_csv('../data/socioeconomic.csv')
consumption = pd.read_csv('../data/Consumption.csv')
regions = pd.read_csv('../data/Region.csv')

# Define translations for Danish to English
socio_translations = {
    "Gennemsnitshusstand": "Average Household",
    "Selvstændig": "Self-employed",
    "Lønmodtager på højeste niveau": "High Income",
    "Lønmodtager på mellemniveau": "Medium Income",
    "Lønmodtager på grundniveau": "Basic Income",
    "Arbejdsløs": "Unemployed", 
    "Uddannelsessøgende": "Student",
    "Pensionist, efterlønsmodtager": "Pensioner",
    "Ude af erhverv i øvrigt": "Not in Workforce"
}

# Clean and process socioeconomic data
socio_df = socioeconomic.copy()
socio_df.columns = [col.strip() for col in socio_df.columns]
socio_df = socio_df.rename(columns={
    'Socioøkonomisk status': 'Group',
    '09.8 Pakkerejser': 'Packages',
    '11.1 Restaurationstjenester': 'Restaurants',
    '11.2 Overnatningsfaciliteter': 'Accommodation'
})
socio_df['Group_EN'] = socio_df['Group'].map(socio_translations)
socio_df['Total'] = socio_df['Packages'] + socio_df['Restaurants'] + socio_df['Accommodation']

# Process consumption/age data
age_df = consumption.copy()
# Identify the fixed prices columns
package_col = [col for col in age_df.columns if 'Package Holidays - Fixed Prices' in col][0]
restaurant_col = [col for col in age_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
accommodation_col = [col for col in age_df.columns if 'Accommodation Services - Fixed Prices' in col][0]

age_df = age_df.rename(columns={
    package_col: 'Packages',
    restaurant_col: 'Restaurants',
    accommodation_col: 'Accommodation'
})
age_df['Total'] = age_df['Packages'] + age_df['Restaurants'] + age_df['Accommodation']

# Process region data
region_df = regions.copy()
# Identify the fixed prices columns
package_col = [col for col in region_df.columns if 'Package Holidays - Fixed Prices' in col][0]
restaurant_col = [col for col in region_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
accommodation_col = [col for col in region_df.columns if 'Accommodation Services - Fixed Prices' in col][0]

region_df = region_df.rename(columns={
    package_col: 'Packages',
    restaurant_col: 'Restaurants',
    accommodation_col: 'Accommodation'
})
region_df['Total'] = region_df['Packages'] + region_df['Restaurants'] + region_df['Accommodation']

# Function to create more polished radar chart that closely matches the example image
def create_polished_radar_chart(df, title, filename, group_col='Group', display_col=None):
    """
    Create a radar chart where axes are demographic groups and polygons are spending categories,
    styled to closely match the example image.
    
    Parameters:
    df: DataFrame with the data
    title: Title for the chart
    filename: Output filename
    group_col: Column containing group identifiers
    display_col: Column containing display names (if None, uses group_col)
    """
    if display_col is None:
        display_col = group_col
        
    # Prepare the data
    groups = df[display_col].tolist()
    n_groups = len(groups)
    
    # Calculate angles for each group (divide the plot / number of variables)
    angles = np.linspace(0, 2 * np.pi, n_groups, endpoint=False).tolist()
    # Complete the loop
    angles += angles[:1]
    
    # Normalize to a 0-100 scale using the same max value for all categories
    # This helps maintain the proper relative scale between categories
    category_max = max(
        df['Packages'].max(),
        df['Restaurants'].max(),
        df['Accommodation'].max()
    )
    
    # Get values for each category
    packages_values = df['Packages'].tolist()
    packages_values += packages_values[:1]  # Complete the loop
    packages_norm = [(val / category_max * 100) for val in packages_values]
    
    restaurants_values = df['Restaurants'].tolist()
    restaurants_values += restaurants_values[:1]  # Complete the loop
    restaurants_norm = [(val / category_max * 100) for val in restaurants_values]
    
    accommodation_values = df['Accommodation'].tolist()
    accommodation_values += accommodation_values[:1]  # Complete the loop
    accommodation_norm = [(val / category_max * 100) for val in accommodation_values]
    
    # Calculate the position of groups to ensure proper labeling
    groups += groups[:1]  # Complete the loop for the labels
    
    # Create the plot with a white background like in the example
    fig = plt.figure(figsize=(10, 10), facecolor='white')
    ax = fig.add_subplot(111, polar=True)
    
    # Define colors exactly matching the example image
    category_colors = {
        'Packages': '#8c68af',      # Moderate purple
        'Restaurants': '#78c17c',   # Moderate green
        'Accommodation': '#f5d98f'  # Light yellow/orange
    }
    
    # Define Danish labels for the legend
    danish_labels = {
        'Packages': 'Rejsepakker',
        'Restaurants': 'Restauranter',
        'Accommodation': 'Overnatning'
    }
    
    # Set up clean gridlines that match the example image
    # Make gridlines lighter and use n concentric circles
    ax.grid(True, color='#e0e0e0', alpha=0.95, linestyle='-', linewidth=0.5)
    
    # Remove default radial grid lines and add custom ones
    ax.set_yticks([25, 50, 75, 100])
    ax.set_yticklabels([])  # Hide labels
    
    # Change the color of the center and spine
    ax.spines['polar'].set_visible(False)
    
    # Plot each category as in the example image
    # Accommodation (yellow polygon)
    ax.fill(angles, accommodation_norm, color=category_colors['Accommodation'], alpha=0.3, edgecolor=category_colors['Accommodation'], linewidth=1.3)
    
    # Restaurants (green polygon)
    ax.fill(angles, restaurants_norm, color=category_colors['Restaurants'], alpha=0.3, edgecolor=category_colors['Restaurants'], linewidth=1.3)
    
    # Packages (purple polygon)
    ax.fill(angles, packages_norm, color=category_colors['Packages'], alpha=0.3, edgecolor=category_colors['Packages'], linewidth=1.3)
    
    # Set the labels for each demographic group
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(groups[:-1], color='#82817f', fontsize=9)
    
    # Set title (no title text in example image, but we'll add one subtly)
    plt.title(title, size=14, color='#707070', pad=15, fontweight='light')
    
    # Add a legend similar to the example
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Packages'], markersize=10, 
                 label=danish_labels['Packages']),
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Restaurants'], markersize=10, 
                 label=danish_labels['Restaurants']),
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Accommodation'], markersize=10, 
                 label=danish_labels['Accommodation'])
    ]
    
    ax.legend(handles=legend_elements, loc='lower center', 
            bbox_to_anchor=(0.5, -0.1), ncol=3, frameon=False, 
            handletextpad=0.5, fontsize=10)
    
    # Save the figure
    plt.tight_layout()
    plt.savefig(f'../assets/{filename}.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Final polished radar visualization created and saved to '../assets/{filename}.png'")

# Create radar charts for each demographic type
# 1. Socioeconomic Groups
create_polished_radar_chart(
    socio_df,
    'Travel Spending Categories by Socioeconomic Group',
    'danish_travel_socioeconomic_final',
    'Group',
    'Group_EN'
)

# 2. Age Groups
create_polished_radar_chart(
    age_df,
    'Travel Spending Categories by Age Group',
    'danish_travel_age_final',
    'Age Group',
    'Age Group'
)

# 3. Regional Groups
create_polished_radar_chart(
    region_df,
    'Travel Spending Categories by Region',
    'danish_travel_region_final',
    'Region',
    'Region'
)

print("All final polished radar visualizations created successfully!")

Final polished radar visualization created and saved to '../assets/danish_travel_socioeconomic_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_age_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_region_final.png'
All final polished radar visualizations created successfully!


In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import os

# Make sure the output directory exists
os.makedirs('../assets', exist_ok=True)

# Load the data
df = pd.read_csv('../data/bubble_plot.csv')

# Clean the data
# Convert string numbers to float, handling missing values and NaN properly
df['GDP'] = pd.to_numeric(df['GDP, PPP (current international $) [NY.GDP.MKTP.PP.CD]'].str.replace(',', ''), errors='coerce')
df['Population'] = pd.to_numeric(df['Population, total [SP.POP.TOTL]'], errors='coerce')
df['Departures'] = pd.to_numeric(df['International tourism, number of departures [ST.INT.DPRT]'].str.replace(',', ''), errors='coerce')
df['PerCapita'] = pd.to_numeric(df['International Tourism Departures per capita'], errors='coerce')

# Create a mapping of countries to continents
continent_mapping = {
    # Nordic Countries (Europe)
    'Denmark': 'Europe', 'Sweden': 'Europe', 'Norway': 'Europe', 'Finland': 'Europe', 'Iceland': 'Europe',
    
    # Major European Countries
    'United Kingdom': 'Europe', 'Germany': 'Europe', 'France': 'Europe', 'Italy': 'Europe', 'Spain': 'Europe',
    'Netherlands': 'Europe', 'Belgium': 'Europe', 'Switzerland': 'Europe', 'Austria': 'Europe', 'Poland': 'Europe',
    'Czech Republic': 'Europe', 'Hungary': 'Europe', 'Romania': 'Europe', 'Portugal': 'Europe', 'Greece': 'Europe',
    'Ireland': 'Europe', 'Luxembourg': 'Europe', 'Slovenia': 'Europe', 'Croatia': 'Europe', 'Lithuania': 'Europe',
    'Latvia': 'Europe', 'Estonia': 'Europe', 'Slovakia': 'Europe', 'Bulgaria': 'Europe',
    
    # North America
    'United States': 'North America', 'Canada': 'North America', 'Mexico': 'North America',
    
    # Asia
    'China': 'Asia', 'Japan': 'Asia', 'Korea, Rep.': 'Asia', 'India': 'Asia', 'Indonesia': 'Asia',
    'Singapore': 'Asia', 'Malaysia': 'Asia', 'Thailand': 'Asia', 'Vietnam': 'Asia', 'Philippines': 'Asia',
    'Hong Kong SAR, China': 'Asia', 'Russian Federation': 'Asia', 'Taiwan, China': 'Asia',
    
    # Oceania
    'Australia': 'Oceania', 'New Zealand': 'Oceania',
    
    # Africa
    'South Africa': 'Africa', 'Egypt, Arab Rep.': 'Africa', 'Nigeria': 'Africa', 'Kenya': 'Africa', 'Morocco': 'Africa',
    
    # South America
    'Brazil': 'South America', 'Argentina': 'South America', 'Chile': 'South America', 'Colombia': 'South America',
    'Peru': 'South America', 'Venezuela, RB': 'South America',
    
    # Middle East
    'Saudi Arabia': 'Middle East', 'United Arab Emirates': 'Middle East', 'Turkey': 'Middle East', 'Israel': 'Middle East',
}

# Apply the continent mapping
df['Continent'] = df['Country Name'].map(continent_mapping).fillna('Other')

# Remove rows with missing data we need
df_clean = df.dropna(subset=['GDP', 'PerCapita', 'Population']).copy()

# Filter to a reasonable year range
years = sorted(df_clean['Time'].unique())
years = [year for year in years if 2000 <= year <= 2019]  # End at 2019 to avoid COVID effects

# Create continent colors (matching your Danish travel theme)
continent_colors = {
    'Europe': '#3498db',        # Blue (to highlight Denmark and Europe)
    'North America': '#2ecc71', # Green
    'Asia': '#e67e22',         # Orange
    'Oceania': '#9b59b6',      # Purple
    'Africa': '#f1c40f',       # Yellow
    'South America': '#1abc9c', # Teal
    'Middle East': '#e74c3c',  # Red
    'Other': '#95a5a6'         # Gray
}

# Create a special marker for Denmark
df_clean['IsDenmark'] = df_clean['Country Name'] == 'Denmark'
df_clean['GDP_Billion'] = df_clean['GDP'] / 1e9  # Convert to billions

# Prepare data for frames
frames = []
for year in years:
    year_data = df_clean[df_clean['Time'] == year].copy()
    frame_data = []
    
    for continent in continent_colors.keys():
        continent_data = year_data[year_data['Continent'] == continent]
        if not continent_data.empty:
            # Create a trace for this continent
            trace = go.Scatter(
                x=continent_data['GDP_Billion'],
                y=continent_data['PerCapita'],
                mode='markers',
                name=continent,
                text=continent_data['Country Name'],
                hovertemplate='<b>%{text}</b><br>' +
                             'GDP: $%{x:.1f}B<br>' +
                             'Tourism/Capita: %{y:.2f}<br>' +
                             '<extra></extra>',  # Removes the secondary box
                marker=dict(
                    size=continent_data['Population'] / 5000000,  # Scale population for bubble size
                    sizemin=4,  # Minimum bubble size
                    color=continent_colors[continent],
                    opacity=0.7,
                    line=dict(width=1, color='white')
                ),
                showlegend=True if year == years[0] else False
            )
            frame_data.append(trace)
    
    # Add special trace for Denmark
    denmark_data = year_data[year_data['Country Name'] == 'Denmark']
    if not denmark_data.empty:
        denmark_trace = go.Scatter(
            x=denmark_data['GDP_Billion'],
            y=denmark_data['PerCapita'],
            mode='markers+text',
            name='Denmark (Highlighted)',
            text=['DENMARK'],
            textposition='top center',
            textfont=dict(size=14, color='#2c3e50', family='Arial Black'),
            marker=dict(
                size=denmark_data['Population'] / 3000000,  # Slightly larger for Denmark
                color='#3498db',
                line=dict(width=3, color='#2c3e50'),
                opacity=1.0
            ),
            hovertemplate='<b>Denmark</b><br>' +
                         'GDP: $%{x:.1f}B<br>' +
                         'Tourism/Capita: %{y:.2f}<br>' +
                         '<extra></extra>',
            showlegend=True if year == years[0] else False
        )
        frame_data.append(denmark_trace)
    
    # Create the frame
    frame = go.Frame(
        data=frame_data,
        name=str(year),
        traces=list(range(len(frame_data)))
    )
    frames.append(frame)

# Create the initial figure with the first year's data
initial_data = frames[0].data
fig = go.Figure(data=initial_data)
fig.frames = frames

# Update layout
fig.update_layout(
    title={
        'text': 'Tourism Departures vs GDP: Denmark\'s Global Journey (2000-2019)',
        'x': 0.5,
        'xanchor': 'center',
        'font': {'size': 20}
    },
    xaxis=dict(
        title="GDP (billions USD PPP)",
        gridcolor='lightgray',
        showgrid=True,
        zeroline=False,
        range=[0, df_clean['GDP_Billion'].max() * 1.05]
    ),
    yaxis=dict(
        title="International Tourism Departures per Capita",
        gridcolor='lightgray',
        showgrid=True,
        zeroline=True,
        range=[-0.1, df_clean['PerCapita'].max() * 1.05]
    ),
    showlegend=True,
    legend=dict(
        orientation="v",
        yanchor="middle",
        y=0.5,
        xanchor="left",
        x=1.02,
        title="Country/Region"
    ),
    plot_bgcolor='white',
    width=1000,
    height=700,
    annotations=[
        # Note about Denmark
        dict(
            text="Denmark is highlighted with bold text and border throughout the animation",
            x=0.02,
            y=0.98,
            xref="paper",
            yref="paper",
            showarrow=False,
            font=dict(size=12, color="#333"),
            align="left",
            bgcolor="rgba(255, 255, 255, 0.8)",
            bordercolor="#dddddd",
            borderwidth=1
        ),
        # Year display
        dict(
            text=str(years[0]),
            x=0.98,
            y=0.02,
            xref="paper",
            yref="paper",
            showarrow=False,
            font=dict(size=40, color="lightgray"),
            align="right"
        ),
        # Note about bubble size
        dict(
            text="Bubble size represents population",
            x=0.02,
            y=0.94,
            xref="paper",
            yref="paper",
            showarrow=False,
            font=dict(size=10, color="gray"),
            align="left"
        )
    ]
)

# Add slider
fig.update_layout(
    sliders=[
        dict(
            active=0,
            yanchor="top",
            xanchor="left",
            currentvalue={
                "font": {"size": 20},
                "prefix": "Year: ",
                "visible": True,
                "xanchor": "center"
            },
            transition={"duration": 500, "easing": "cubic-in-out"},
            pad={"b": 10, "t": 50},
            len=0.9,
            x=0.05,
            y=0,
            steps=[
                dict(
                    args=[
                        [str(year)],
                        {"frame": {"duration": 300, "redraw": True},
                         "mode": "immediate",
                         "transition": {"duration": 300},
                         "title": f"Tourism Departures vs GDP: Denmark's Global Journey ({year})",
                         "annotations": [
                             # Update year annotation
                             dict(
                                 text=str(year),
                                 x=0.98,
                                 y=0.02,
                                 xref="paper",
                                 yref="paper",
                                 showarrow=False,
                                 font=dict(size=40, color="lightgray"),
                                 align="right"
                             ),
                             # Keep other annotations
                             dict(
                                 text="Denmark is highlighted with bold text and border throughout the animation",
                                 x=0.02,
                                 y=0.98,
                                 xref="paper",
                                 yref="paper",
                                 showarrow=False,
                                 font=dict(size=12, color="#333"),
                                 align="left",
                                 bgcolor="rgba(255, 255, 255, 0.8)",
                                 bordercolor="#dddddd",
                                 borderwidth=1
                             ),
                             dict(
                                 text="Bubble size represents population",
                                 x=0.02,
                                 y=0.94,
                                 xref="paper",
                                 yref="paper",
                                 showarrow=False,
                                 font=dict(size=10, color="gray"),
                                 align="left"
                             )
                         ]}
                    ],
                    label=str(year),
                    method="animate"
                )
                for year in years
            ]
        )
    ]
)

# Add update menus for play/pause
fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            showactive=False,
            x=0.1,
            xanchor="right",
            y=0,
            yanchor="top",
            buttons=[
                dict(
                    label="Play",
                    method="animate",
                    args=[None, {
                        "frame": {"duration": 1500, "redraw": True},
                        "fromcurrent": True,
                        "transition": {"duration": 500, "easing": "quadratic-in-out"},
                        "mode": "immediate"
                    }]
                ),
                dict(
                    label="Pause",
                    method="animate",
                    args=[[None], {
                        "frame": {"duration": 0, "redraw": True},
                        "mode": "immediate",
                        "transition": {"duration": 0}
                    }]
                )
            ]
        )
    ]
)

# Create auto-play HTML wrapper
auto_play_html = f"""
<!DOCTYPE html>
<html>
<head>
    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
    <style>
        body {{
            margin: 0;
            padding: 10px;
            font-family: Arial, sans-serif;
            background-color: #f8f9fa;
        }}
        #plot {{
            width: 100%;
            height: 700px;
            background-color: white;
            border-radius: 8px;
            box-shadow: 0 4px 12px rgba(0,0,0,0.1);
        }}
        .info-box {{
            text-align: center;
            margin-bottom: 10px;
            color: #555;
            font-size: 14px;
        }}
    </style>
</head>
<body>
    <div class="info-box">
        Denmark's journey through global tourism patterns (2000-2019)
    </div>
    <div id="plot"></div>
    <script>
        // Plot data
        var plotData = {fig.to_json()};
        Plotly.newPlot('plot', plotData.data, plotData.layout, {{responsive: true}});
        
        // Auto-start the animation after a short delay
        setTimeout(function() {{
            Plotly.animate('plot', null, {{
                frame: {{duration: 1500, redraw: true}},
                transition: {{duration: 500, easing: 'quadratic-in-out'}},
                mode: 'immediate'
            }});
        }}, 1000);
        
        // Restart animation when it ends
        var restarting = false;
        document.getElementById('plot').on('plotly_animated', function() {{
            if (restarting) return;
            
            try {{
                var currentFrame = document.getElementById('plot')._fullLayout._currentFrame;
                var totalFrames = document.getElementById('plot')._fullLayout.sliders[0].steps.length;
                
                if (currentFrame === totalFrames - 1) {{
                    restarting = true;
                    setTimeout(function() {{
                        // Reset to first frame and restart
                        Plotly.animate('plot', [{{
                            name: '{years[0]}',
                            frame: {{duration: 0}},
                            transition: {{duration: 0}}
                        }}]).then(function() {{
                            setTimeout(function() {{
                                Plotly.animate('plot', null, {{
                                    frame: {{duration: 1500, redraw: true}},
                                    transition: {{duration: 500, easing: 'quadratic-in-out'}},
                                    mode: 'immediate'
                                }});
                                restarting = false;
                            }}, 500);
                        }});
                    }}, 3000); // Wait 3 seconds before restarting
                }}
            }} catch(e) {{
                console.log("Animation restart error:", e);
                restarting = false;
            }}
        }});
    </script>
</body>
</html>
"""

# Save the auto-play version
with open('../assets/danish_travel_bubble_enhanced.html', 'w') as f:
    f.write(auto_play_html)

# Also save the plotly version
fig.write_html(
    '../assets/danish_travel_bubble_plotly.html', 
    include_plotlyjs='cdn',
    config={
        'displayModeBar': True, 
        'responsive': True
    }
)

print("Fixed enhanced bubble plot with auto-play and Denmark highlight created successfully!")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



KeyError: 0