In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# Make sure the output directory exists
# This directory will store the generated combined radar chart image
os.makedirs('../assets', exist_ok=True)

# Load data from CSV files
# Assuming these files are located in a 'data' directory one level up from the script
try:
    socioeconomic = pd.read_csv('../data/socioeconomic.csv')
    consumption = pd.read_csv('../data/Consumption.csv')
    regions = pd.read_csv('../data/Region.csv')
except FileNotFoundError as e:
    print(f"Error loading data file: {e}")
    print("Please ensure 'socioeconomic.csv', 'Consumption.csv', and 'Region.csv' are in the '../data/' directory.")
    # In a real script, you might handle this more gracefully, but for this example, we'll exit
    exit()

# Define translations for Danish to English socioeconomic statuses
# This is used for clearer labeling in the radar charts
socio_translations = {
    "Gennemsnitshusstand": "Average Household",
    "Selvstændig": "Self-employed",
    "Lønmodtager på højeste niveau": "High Income",
    "Lønmodtager på mellemniveau": "Medium Income",
    "Lønmodtager på grundniveau": "Basic Income",
    "Arbejdsløs": "Unemployed",
    "Uddannelsessøgende": "Student",
    "Pensionist, efterlønsmodtager": "Pensioner",
    "Ude af erhverv i øvrigt": "Not in Workforce"
}

# Clean and process socioeconomic data
# Renaming columns and adding a translated group column
socio_df = socioeconomic.copy()
socio_df.columns = [col.strip() for col in socio_df.columns] # Remove leading/trailing whitespace from column names
socio_df = socio_df.rename(columns={
    'Socioøkonomisk status': 'Group',
    '09.8 Pakkerejser': 'Packages',
    '11.1 Restaurationstjenester': 'Restaurants',
    '11.2 Overnatningsfaciliteter': 'Accommodation'
})

# Check for unmapped values
print("Original socioeconomic groups in data:")
print(socio_df['Group'].unique())

# Add missing translations if any
missing_groups = [group for group in socio_df['Group'].unique() if group not in socio_translations and pd.notna(group)]
if missing_groups:
    print(f"Warning: These socioeconomic groups are not in the translation dictionary: {missing_groups}")
    # Add default translations for missing groups
    for group in missing_groups:
        socio_translations[group] = group  # Use the same name if no translation available

# Map Danish group names to English
socio_df['Group_EN'] = socio_df['Group'].map(socio_translations)

# Handle any remaining NaN values
socio_df = socio_df.dropna(subset=['Group_EN'])  # Remove rows where Group_EN is NaN

# Ensure we have data to work with
if socio_df.empty:
    print("Error: No valid socioeconomic data after cleaning.")
    exit()

# Calculate total spending
socio_df['Total'] = socio_df['Packages'] + socio_df['Restaurants'] + socio_df['Accommodation'] # Calculate total spending

print(f"Final socioeconomic groups: {socio_df['Group_EN'].unique()}")


# Process consumption/age data
# Identifying and renaming relevant columns for age-based data
age_df = consumption.copy()
# Find the exact column names containing 'Package Holidays - Fixed Prices', etc.
# Using error handling in case column names change slightly
try:
    package_col_age = [col for col in age_df.columns if 'Package Holidays - Fixed Prices' in col][0]
    restaurant_col_age = [col for col in age_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
    accommodation_col_age = [col for col in age_df.columns if 'Accommodation Services - Fixed Prices' in col][0]
except IndexError:
    print("Error: Could not find expected 'Fixed Prices' columns in Consumption.csv.")
    print("Please check column names like 'Package Holidays - Fixed Prices'.")
    exit()


age_df = age_df.rename(columns={
    package_col_age: 'Packages',
    restaurant_col_age: 'Restaurants',
    accommodation_col_age: 'Accommodation',
    'AGE': 'Age Group' # Assuming 'AGE' is the column for age groups
})
age_df['Total'] = age_df['Packages'] + age_df['Restaurants'] + age_df['Accommodation'] # Calculate total spending

# Process region data
# Identifying and renaming relevant columns for region-based data
region_df = regions.copy()
# Find the exact column names containing 'Package Holidays - Fixed Prices', etc.
try:
    package_col_region = [col for col in region_df.columns if 'Package Holidays - Fixed Prices' in col][0]
    restaurant_col_region = [col for col in region_df.columns if 'Restaurant Services - Fixed Prices' in col][0]
    accommodation_col_region = [col for col in region_df.columns if 'Accommodation Services - Fixed Prices' in col][0]
except IndexError:
    print("Error: Could not find expected 'Fixed Prices' columns in Region.csv.")
    print("Please check column names like 'Package Holidays - Fixed Prices'.")
    exit()

region_df = region_df.rename(columns={
    package_col_region: 'Packages',
    restaurant_col_region: 'Restaurants',
    accommodation_col_region: 'Accommodation',
    'REGION': 'Region' # Assuming 'REGION' is the column for regions
})
region_df['Total'] = region_df['Packages'] + region_df['Restaurants'] + region_df['Accommodation'] # Calculate total spending


# Define MORE VIBRANT colors for each spending category
# Using distinct and brighter colors
category_colors = {
    'Packages': '#9b59b6',      # Amethyst (Vibrant Purple)
    'Restaurants': '#2ecc71',   # Emerald (Vibrant Green)
    'Accommodation': '#f1c40f'  # Sunflower (Vibrant Yellow)
}

# Define ENGLISH labels for the legend
# Using English labels as requested to clarify color mapping on the plot
english_labels = {
    'Packages': 'Package Holidays',
    'Restaurants': 'Restaurants',
    'Accommodation': 'Accommodation'
}

# Function to plot a single radar chart on a given axes object
def plot_single_radar_chart(ax, df, chart_title, group_col='Group', display_col=None):
    """
    Plots a single radar chart on a given Matplotlib axes object.

    Parameters:
    ax: The Matplotlib axes object to plot on.
    df: DataFrame with the data. Expected columns: 'Packages', 'Restaurants', 'Accommodation', and a group column.
    chart_title: Title for this specific subplot.
    group_col: Column containing the primary group identifiers (used for data processing).
    display_col: Column containing display names for the groups on the chart axes (if None, uses group_col).
    """
    if display_col is None:
        display_col = group_col

    # Prepare the data for plotting
    groups = df[display_col].tolist()
    n_groups = len(groups)

    # Calculate angles for each group
    angles = np.linspace(0, 2 * np.pi, n_groups, endpoint=False).tolist()
    angles += angles[:1] # Complete the loop

    # Normalize to a 0-100 scale using the same max value for all categories across ALL dataframes
    # This ensures consistent scale across all three subplots for comparison
    # We need to find the global max across all three dataframes
    global_max = max(
        socio_df[['Packages', 'Restaurants', 'Accommodation']].values.max(),
        age_df[['Packages', 'Restaurants', 'Accommodation']].values.max(),
        region_df[['Packages', 'Restaurants', 'Accommodation']].values.max()
    )

    # Check if global_max is zero to avoid division by zero
    if global_max == 0:
        print(f"Warning: Global maximum spending is 0. Cannot create radar charts.")
        return


    # Get values for each category and normalize them
    packages_values = df['Packages'].tolist()
    packages_values += packages_values[:1]
    packages_norm = [(val / global_max * 100) for val in packages_values]

    restaurants_values = df['Restaurants'].tolist()
    restaurants_values += restaurants_values[:1]
    restaurants_norm = [(val / global_max * 100) for val in restaurants_values]

    accommodation_values = df['Accommodation'].tolist()
    accommodation_values += accommodation_values[:1]
    accommodation_norm = [(val / global_max * 100) for val in accommodation_values]

    # Prepare group labels for plotting
    groups += groups[:1]

    # Set up cleaner gridlines on the subplot axes
    ax.grid(True, color='#cccccc', alpha=0.8, linestyle='-', linewidth=0.7) # Slightly darker and more prominent grid

    # Set radial ticks and labels
    ax.set_yticks([25, 50, 75, 100])
    ax.set_yticklabels(['25%', '50%', '75%', '100%'], color='#777777', fontsize=8)
    ax.set_ylim(0, 100) # Ensure the limits are from 0 to 100

    # Remove default radial grid lines and add custom ones
    ax.spines['polar'].set_visible(False) # Keep the outer circle visible if desired, or hide

    # Plot each category as a filled polygon with adjusted alpha and edge color
    alpha_level = 0.5 # Slightly higher alpha for more solid look

    # Accommodation (Vibrant Yellow polygon)
    # Use english_labels here for the legend entry in the figure legend
    ax.fill(angles, accommodation_norm, color=category_colors['Accommodation'], alpha=alpha_level, edgecolor=category_colors['Accommodation'], linewidth=1.5, label=english_labels['Accommodation'])
    ax.plot(angles, accommodation_norm, color=category_colors['Accommodation'], linewidth=2.0) # Add a line on top for clarity

    # Restaurants (Vibrant Green polygon)
    # Use english_labels here for the legend entry in the figure legend
    ax.fill(angles, restaurants_norm, color=category_colors['Restaurants'], alpha=alpha_level, edgecolor=category_colors['Restaurants'], linewidth=1.5, label=english_labels['Restaurants'])
    ax.plot(angles, restaurants_norm, color=category_colors['Restaurants'], linewidth=2.0) # Add a line on top for clarity

    # Packages (Vibrant Purple polygon)
    # Use english_labels here for the legend entry in the figure legend
    ax.fill(angles, packages_norm, color=category_colors['Packages'], alpha=alpha_level, edgecolor=category_colors['Packages'], linewidth=1.5, label=english_labels['Packages'])
    ax.plot(angles, packages_norm, color=category_colors['Packages'], linewidth=2.0) # Add a line on top for clarity

    # Set the labels for each demographic group on the axes
    ax.set_xticks(angles[:-1])
    ax.set_xticklabels(groups[:-1], color='#555555', fontsize=10) # Slightly larger and darker labels

    # Set title for the subplot
    ax.set_title(chart_title, size=14, color='#333333', pad=20, fontweight='bold') # Prominent subplot title


# --- Create the combined figure with subplots ---

# Create a figure with 3 subplots arranged in a row, using polar projection
fig, axes = plt.subplots(3, 1, figsize=(10, 24), subplot_kw=dict(polar=True))

# Add a main title to the entire figure
fig.suptitle('Overview of Travel Spending Patterns', size=20, color='#333333', y=1.05, fontweight='bold') # y adjusts position above subplots

# Plot each radar chart on its respective subplot axes
plot_single_radar_chart(axes[0], socio_df, 'Socioeconomic Group', 'Group', 'Group_EN')
plot_single_radar_chart(axes[1], age_df, 'Age Group', 'Age Group', 'Age Group')
plot_single_radar_chart(axes[2], region_df, 'Region', 'Region', 'Region')

# Create a single legend for the entire figure
# We get the handles and labels from one of the axes plots (they are the same for all)
handles, labels = axes[0].get_legend_handles_labels()
fig.legend(handles, labels, loc='lower center', ncol=3, bbox_to_anchor=(0.5, -0.05), frameon=False, fontsize=12)

# Adjust layout to prevent titles/labels overlapping and make space for suptitle and legend
plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # rect=[left, bottom, right, top] in normalized figure coordinates

# Save the combined figure
plt.savefig('../assets/combined_radar_charts.png', dpi=300, bbox_inches='tight') # Save with high resolution

# Close the figure to free up memory
plt.close(fig)

print("Combined radar visualization created successfully as '../assets/combined_radar_charts.png'!")

Original socioeconomic groups in data:
['Gennemsnitshusstand' 'Selvstændig' 'Lønmodtager på højeste niveau'
 'Lønmodtager på mellemniveau' 'Lønmodtager på grundniveau' 'Arbejdsløs'
 'Uddannelsessøgende' 'Pensionist' 'Ude af erhverv i øvrigt']
Final socioeconomic groups: ['Average Household' 'Self-employed' 'High Income' 'Medium Income'
 'Basic Income' 'Unemployed' 'Student' 'Pensionist' 'Not in Workforce']
Combined radar visualization created successfully as '../assets/combined_radar_charts.png'!
