In [None]:
import plotly.graph_objects as go
import pandas as pd
import plotly.io as pio

# Read the CSV files
consumption = pd.read_csv("data/Consumption.csv")
Region = pd.read_csv("data/Region.csv")
socioeconomic = pd.read_csv("data/socioeconomic.csv")

# English translations for socioeconomic groups
translations = {
    "Gennemsnitshusstand": "Average Household",
    "Selvstændig": "Self-employed",
    "Lønmodtager på højeste niveau": "High Income",
    "Lønmodtager på mellemniveau": "Medium Income",
    "Lønmodtager på grundniveau": "Basic Income",
    "Arbejdsløs": "Unemployed",
    "Uddannelsessøgende": "Student",
    "Pensionist, efterlønsmodtager": "Pensioner",
    "Ude af erhverv i øvrigt": "Not in Workforce"
}

# Create DataFrame with necessary data from socioeconomic.csv
# Convert column names for consistency
socioeconomic = socioeconomic.rename(columns={
    'Socioøkonomisk status': 'Group',
    '09.8 Pakkerejser': 'Travel Packages',
    '11.1 Restaurationstjenester': 'Restaurants',
    '11.2 Overnatningsfaciliteter': 'Accommodation'
})

# Add English translations
socioeconomic['Group_EN'] = socioeconomic['Group'].map(translations)

# Calculate total spending for each group
socioeconomic['Total'] = socioeconomic['Travel Packages'] + socioeconomic['Restaurants'] + socioeconomic['Accommodation']

# Sort by total spending for better visualization
socioeconomic = socioeconomic.sort_values('Total', ascending=False)

# Create radar chart
categories = ['Travel Packages', 'Restaurants', 'Accommodation']

fig = go.Figure()

# Define a pleasing color palette
colors = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A', 
          '#19D3F3', '#FF6692', '#B6E880', '#FF97FF']

# Add traces for each group
for i, (index, row) in enumerate(socioeconomic.iterrows()):
    group = row['Group']
    group_en = row['Group_EN']
    values = [row['Travel Packages'], row['Restaurants'], row['Accommodation']]
    # Add the first value again to close the loop
    values_plot = values + [values[0]]
    
    fig.add_trace(go.Scatterpolar(
        r=values_plot,
        theta=categories + [categories[0]],
        fill='toself',
        name=group_en,
        line_color=colors[i % len(colors)],
        opacity=0.8
    ))

# Update layout for better appearance
fig.update_layout(
    polar=dict(
        radialaxis=dict(
            visible=True,
            range=[0, 25000]
        )),
    showlegend=True,
    title={
        'text': 'How Danes Budget for Travel by Demographic Group',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 24}
    },
    margin=dict(l=80, r=80, t=120, b=80),
    height=800,
    width=1000,
    legend=dict(
        title="Demographic Groups",
        orientation="h",
        yanchor="bottom",
        y=-0.15,
        xanchor="center",
        x=0.5
    ),
)

# Save the radar chart as HTML
fig.write_html('danish_travel_radar.html')

# Create bar chart as second visualization
fig2 = go.Figure()

# Add bars for each category
for i, category in enumerate(categories):
    fig2.add_trace(go.Bar(
        x=socioeconomic['Group_EN'],
        y=socioeconomic[category],
        name=category,
        marker_color=colors[i],
        text=socioeconomic[category].apply(lambda x: f'{x:,} DKK'),
        textposition='auto',
    ))

# Update layout
fig2.update_layout(
    title={
        'text': 'Travel Spending Breakdown by Danish Demographic Groups',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 24}
    },
    xaxis_title="Demographic Group",
    yaxis_title="Spending in DKK",
    barmode='group',
    height=600,
    width=1000,
    margin=dict(l=50, r=50, t=120, b=150),
    legend=dict(
        title="Spending Category",
        orientation="h",
        yanchor="bottom",
        y=-0.3,
        xanchor="center",
        x=0.5
    ),
)

# Rotate x-axis labels for better readability
fig2.update_xaxes(tickangle=45)

# Save the bar chart as HTML
fig2.write_html('danish_travel_bars.html')

# Create a stacked bar chart to show relative proportions
fig3 = go.Figure()

# Add bars for each category
for i, category in enumerate(categories):
    fig3.add_trace(go.Bar(
        x=socioeconomic['Group_EN'],
        y=socioeconomic[category],
        name=category,
        marker_color=colors[i]
    ))

# Update layout for stacked bar chart
fig3.update_layout(
    title={
        'text': 'Proportional Travel Spending by Danish Demographic Groups',
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 24}
    },
    xaxis_title="Demographic Group",
    yaxis_title="Spending in DKK",
    barmode='stack',
    height=600,
    width=1000,
    margin=dict(l=50, r=50, t=120, b=150),
    legend=dict(
        title="Spending Category",
        orientation="h",
        yanchor="bottom",
        y=-0.3,
        xanchor="center",
        x=0.5
    ),
)

# Rotate x-axis labels for better readability
fig3.update_xaxes(tickangle=45)

# Save the stacked bar chart as HTML
fig3.write_html('danish_travel_stacked.html')

# Create a bonus chart comparing regions
if 'Region' in Region.columns:
    # Clean and prepare region data
    region_data = Region.copy()
    region_data = region_data.rename(columns={
        'Region': 'Region',
        '09.8 Package Holidays - Fixed Prices': 'Travel Packages',
        '11.1 Restaurant Services - Fixed Prices': 'Restaurants',
        '11.2 Accommodation Services - Fixed Prices': 'Accommodation'
    })
    
    # Select only the relevant columns if they exist
    selected_columns = ['Region']
    if '09.8 Package Holidays - Fixed Prices' in region_data.columns:
        selected_columns.append('09.8 Package Holidays - Fixed Prices')
    if '11.1 Restaurant Services - Fixed Prices' in region_data.columns:
        selected_columns.append('11.1 Restaurant Services - Fixed Prices')
    if '11.2 Accommodation Services - Fixed Prices' in region_data.columns:
        selected_columns.append('11.2 Accommodation Services - Fixed Prices')
    
    if len(selected_columns) >= 4:  # Region + at least 3 categories
        region_data = region_data[selected_columns]
        region_data = region_data.rename(columns={
            '09.8 Package Holidays - Fixed Prices': 'Travel Packages',
            '11.1 Restaurant Services - Fixed Prices': 'Restaurants',
            '11.2 Accommodation Services - Fixed Prices': 'Accommodation'
        })
        
        # Create regional comparison chart
        fig4 = go.Figure()
        
        # Add bars for each category
        for i, category in enumerate(['Travel Packages', 'Restaurants', 'Accommodation']):
            fig4.add_trace(go.Bar(
                x=region_data['Region'],
                y=region_data[category],
                name=category,
                marker_color=colors[i]
            ))
        
        # Update layout
        fig4.update_layout(
            title={
                'text': 'Travel Spending by Danish Regions',
                'y':0.95,
                'x':0.5,
                'xanchor': 'center',
                'yanchor': 'top',
                'font': {'size': 24}
            },
            xaxis_title="Region",
            yaxis_title="Spending in DKK",
            barmode='group',
            height=600,
            width=1000,
            margin=dict(l=50, r=50, t=120, b=150),
            legend=dict(
                title="Spending Category",
                orientation="h",
                yanchor="bottom",
                y=-0.3,
                xanchor="center",
                x=0.5
            ),
        )
        
        # Save the regional comparison chart
        fig4.write_html('danish_travel_regions.html')

print("Visualizations created and saved as HTML files!")


invalid escape sequence '\C'


invalid escape sequence '\C'


invalid escape sequence '\C'



FileNotFoundError: [Errno 2] No such file or directory: 'data\\Consumption.csv'