In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Make sure the output directory exists
os.makedirs('../assets', exist_ok=True)

# Load data from CSV files
socioeconomic = pd.read_csv('../data/socioeconomic.csv')
consumption = pd.read_csv('../data/Consumption.csv')
regions = pd.read_csv('../data/Region.csv')

# Define translations for Danish to English
socio_translations = {
    "Gennemsnitshusstand": "Average Household",
    "Selvstændig": "Self-employed",
    "Lønmodtager på højeste niveau": "High Income",
    "Lønmodtager på mellemniveau": "Medium Income",
    "Lønmodtager på grundniveau": "Basic Income",
    "Arbejdsløs": "Unemployed", 
    "Uddannelsessøgende": "Student",
    "Pensionist, efterlønsmodtager": "Pensioner",
    "Ude af erhverv i øvrigt": "Not in Workforce"
}

# Clean and process socioeconomic data
socio_df = socioeconomic.copy()
socio_df.columns = [col.strip() for col in socio_df.columns]
socio_df = socio_df.rename(columns={
    'Socioøkonomisk status': 'Group',
    '09.8 Pakkerejser': 'Packages',
    '11.1 Restaurationstjenester': 'Restaurants',
    '11.2 Overnatningsfaciliteter': 'Accommodation'
})
socio_df['Group_EN'] = socio_df['Group'].map(socio_translations)
socio_df['Total'] = socio_df['Packages'] + socio_df['Restaurants'] + socio_df['Accommodation']

# Process consumption/age data
age_df = consumption.copy()
# Identify the fixed prices columns
package_col = [col for col in age_df.columns if 'Package Holidays - Fixed Prices' in col][0]
restaurant_col = [col for col in age_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
accommodation_col = [col for col in age_df.columns if 'Accommodation Services - Fixed Prices' in col][0]

age_df = age_df.rename(columns={
    package_col: 'Packages',
    restaurant_col: 'Restaurants',
    accommodation_col: 'Accommodation'
})
age_df['Total'] = age_df['Packages'] + age_df['Restaurants'] + age_df['Accommodation']

# Process region data
region_df = regions.copy()
# Identify the fixed prices columns
package_col = [col for col in region_df.columns if 'Package Holidays - Fixed Prices' in col][0]
restaurant_col = [col for col in region_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
accommodation_col = [col for col in region_df.columns if 'Accommodation Services - Fixed Prices' in col][0]

region_df = region_df.rename(columns={
    package_col: 'Packages',
    restaurant_col: 'Restaurants',
    accommodation_col: 'Accommodation'
})
region_df['Total'] = region_df['Packages'] + region_df['Restaurants'] + region_df['Accommodation']

# Function to create more polished radar chart that closely matches the example image
def create_polished_radar_chart(df, title, filename, group_col='Group', display_col=None):
    """
    Create a radar chart where axes are demographic groups and polygons are spending categories,
    styled to closely match the example image.
    
    Parameters:
    df: DataFrame with the data
    title: Title for the chart
    filename: Output filename
    group_col: Column containing group identifiers
    display_col: Column containing display names (if None, uses group_col)
    """
    if display_col is None:
        display_col = group_col
        
    # Prepare the data
    groups = df[display_col].tolist()
    n_groups = len(groups)
    
    # Calculate angles for each group (divide the plot / number of variables)
    angles = np.linspace(0, 2 * np.pi, n_groups, endpoint=False).tolist()
    # Complete the loop
    angles += angles[:1]
    
    # Normalize to a 0-100 scale using the same max value for all categories
    # This helps maintain the proper relative scale between categories
    category_max = max(
        df['Packages'].max(),
        df['Restaurants'].max(),
        df['Accommodation'].max()
    )
    
    # Get values for each category
    packages_values = df['Packages'].tolist()
    packages_values += packages_values[:1]  # Complete the loop
    packages_norm = [(val / category_max * 100) for val in packages_values]
    
    restaurants_values = df['Restaurants'].tolist()
    restaurants_values += restaurants_values[:1]  # Complete the loop
    restaurants_norm = [(val / category_max * 100) for val in restaurants_values]
    
    accommodation_values = df['Accommodation'].tolist()
    accommodation_values += accommodation_values[:1]  # Complete the loop
    accommodation_norm = [(val / category_max * 100) for val in accommodation_values]
    
    # Calculate the position of groups to ensure proper labeling
    groups += groups[:1]  # Complete the loop for the labels
    
    # Create the plot with a white background like in the example
    fig = plt.figure(figsize=(10, 10), facecolor='white')
    ax = fig.add_subplot(111, polar=True)
    
    # Define colors exactly matching the example image
    category_colors = {
        'Packages': '#8c68af',      # Moderate purple
        'Restaurants': '#78c17c',   # Moderate green
        'Accommodation': '#f5d98f'  # Light yellow/orange
    }
    
    # Define Danish labels for the legend
    danish_labels = {
        'Packages': 'Rejsepakker',
        'Restaurants': 'Restauranter',
        'Accommodation': 'Overnatning'
    }
    
    # Set up clean gridlines that match the example image
    # Make gridlines lighter and use n concentric circles
    ax.grid(True, color='#e0e0e0', alpha=0.95, linestyle='-', linewidth=0.5)
    
    # Remove default radial grid lines and add custom ones
    ax.set_yticks([25, 50, 75, 100])
    ax.set_yticklabels([])  # Hide labels
    
    # Change the color of the center and spine
    ax.spines['polar'].set_visible(False)
    
    # Plot each category as in the example image
    # Accommodation (yellow polygon)
    ax.fill(angles, accommodation_norm, color=category_colors['Accommodation'], alpha=0.3, edgecolor=category_colors['Accommodation'], linewidth=1.3)
    
    # Restaurants (green polygon)
    ax.fill(angles, restaurants_norm, color=category_colors['Restaurants'], alpha=0.3, edgecolor=category_colors['Restaurants'], linewidth=1.3)
    
    # Packages (purple polygon)
    ax.fill(angles, packages_norm, color=category_colors['Packages'], alpha=0.3, edgecolor=category_colors['Packages'], linewidth=1.3)
    
    # Set the labels for each demographic group
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(groups[:-1], color='#82817f', fontsize=9)
    
    # Set title (no title text in example image, but we'll add one subtly)
    plt.title(title, size=14, color='#707070', pad=15, fontweight='light')
    
    # Add a legend similar to the example
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Packages'], markersize=10, 
                 label=danish_labels['Packages']),
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Restaurants'], markersize=10, 
                 label=danish_labels['Restaurants']),
        plt.Line2D([0], [0], marker='o', color='w', 
                 markerfacecolor=category_colors['Accommodation'], markersize=10, 
                 label=danish_labels['Accommodation'])
    ]
    
    ax.legend(handles=legend_elements, loc='lower center', 
            bbox_to_anchor=(0.5, -0.1), ncol=3, frameon=False, 
            handletextpad=0.5, fontsize=10)
    
    # Save the figure
    plt.tight_layout()
    plt.savefig(f'../assets/{filename}.png', dpi=300, bbox_inches='tight')
    plt.close()
    
    print(f"Final polished radar visualization created and saved to '../assets/{filename}.png'")

# Create radar charts for each demographic type
# 1. Socioeconomic Groups
create_polished_radar_chart(
    socio_df,
    'Travel Spending Categories by Socioeconomic Group',
    'danish_travel_socioeconomic_final',
    'Group',
    'Group_EN'
)

# 2. Age Groups
create_polished_radar_chart(
    age_df,
    'Travel Spending Categories by Age Group',
    'danish_travel_age_final',
    'Age Group',
    'Age Group'
)

# 3. Regional Groups
create_polished_radar_chart(
    region_df,
    'Travel Spending Categories by Region',
    'danish_travel_region_final',
    'Region',
    'Region'
)

print("All final polished radar visualizations created successfully!")

Final polished radar visualization created and saved to '../assets/danish_travel_socioeconomic_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_age_final.png'
Final polished radar visualization created and saved to '../assets/danish_travel_region_final.png'
All final polished radar visualizations created successfully!
