In [6]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as plt
import plotly.express as px
import json

df = pd.read_csv('german-canteens(filtered)(2).csv', sep='@', engine='python')
df['date'] = pd.to_datetime(df['date'])
df['student_price'] = pd.to_numeric(df['student_price'], errors='coerce')
df['employee_price'] = pd.to_numeric(df['employee_price'], errors='coerce')
df['guest_price'] = pd.to_numeric(df['guest_price'], errors='coerce')

def remove_outliers_bi_weekly(df):
    columns = ['student_price', 'employee_price', 'guest_price']
    for column in columns:
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        df = df[(df[column] >= (Q1 - 1.5 * IQR)) & (df[column] <= (Q3 + 1.5 * IQR))]
    return df

df_cleaned = df.dropna(subset=['student_price', 'employee_price', 'guest_price'])
df_cleaned = df_cleaned[(df_cleaned['student_price'] > 0) &
                        (df_cleaned['employee_price'] > 0) &
                        (df_cleaned['guest_price'] > 0)]


bi_weekly_averages = df_cleaned.set_index('date').resample('2W')['student_price', 'employee_price', 'guest_price'].mean().reset_index()
bi_weekly_averages_no_outliers = remove_outliers_bi_weekly(bi_weekly_averages)
fig = go.Figure()

fig.add_trace(go.Scatter(x=bi_weekly_averages_no_outliers['date'], y=bi_weekly_averages_no_outliers['student_price'], mode='lines+markers', name='Average Student Price'))
fig.add_trace(go.Scatter(x=bi_weekly_averages_no_outliers['date'], y=bi_weekly_averages_no_outliers['employee_price'], mode='lines+markers', name='Average Employee Price'))
fig.add_trace(go.Scatter(x=bi_weekly_averages_no_outliers['date'], y=bi_weekly_averages_no_outliers['guest_price'], mode='lines+markers', name='Average Guest Price'))

fig.add_trace(go.Scatter(x=[bi_weekly_averages_no_outliers['date'].iloc[0], bi_weekly_averages_no_outliers['date'].iloc[-1]], 
                         y=[bi_weekly_averages_no_outliers['student_price'].iloc[0], bi_weekly_averages_no_outliers['student_price'].iloc[-1]], 
                         mode='lines', line=dict(dash='dot'), name='Trend - Student'))

fig.add_trace(go.Scatter(x=[bi_weekly_averages_no_outliers['date'].iloc[0], bi_weekly_averages_no_outliers['date'].iloc[-1]], 
                         y=[bi_weekly_averages_no_outliers['employee_price'].iloc[0], bi_weekly_averages_no_outliers['employee_price'].iloc[-1]], 
                         mode='lines', line=dict(dash='dot'), name='Trend - Employee'))

fig.add_trace(go.Scatter(x=[bi_weekly_averages_no_outliers['date'].iloc[0], bi_weekly_averages_no_outliers['date'].iloc[-1]], 
                         y=[bi_weekly_averages_no_outliers['guest_price'].iloc[0], bi_weekly_averages_no_outliers['guest_price'].iloc[-1]], 
                         mode='lines', line=dict(dash='dot'), name='Trend - Guest'))

fig.update_layout(title='Bi-Weekly Average Meal Prices in German Canteens Over Time (Outliers Removed)',
                  xaxis_title='Date',
                  yaxis_title='Price (EUR)',
                  legend_title='Category')

fig.show()

In [8]:
csv_path = 'german-canteens(filtered)(2).csv'
df = pd.read_csv(csv_path, sep='@', encoding='utf8')
json_path = 'further_updated_german_canteens.json'
with open(json_path, 'r', encoding='utf-8') as file:
    state_info = json.load(file)
state_df = pd.DataFrame(state_info)

merged_df = pd.merge(df, state_df[['id', 'state']], left_on='mensa_id', right_on='id')

price_columns = ['student_price', 'employee_price', 'guest_price']
for column in price_columns:
    merged_df[column] = pd.to_numeric(merged_df[column], errors='coerce')

cleaned_df = merged_df.dropna(subset=price_columns)
grouped = cleaned_df.groupby('state')[price_columns].mean().reset_index()

fig = go.Figure()
for price_type in price_columns:
    fig.add_trace(
        go.Bar(
            x=grouped['state'], 
            y=grouped[price_type],
            name=price_type,
            visible= (price_type == 'student_price')  
    )
    )
buttons = []

for price_type in price_columns:
    buttons.append(
        dict(
            label=price_type,
            method="update",
            args=[{"visible": [price == price_type for price in price_columns]},
                  {"title": f"Average {price_type.replace('_', ' ').capitalize()} in German States"}]
        )
    )

fig.update_layout(
    updatemenus=[{
        "buttons": buttons,
        "direction": "down",
        "active": 0,
    }],
    title="Average Student Price in German States",
    xaxis=dict(title="State"),
    yaxis=dict(title="Average Price (EUR)"),
    barmode="group"
)

fig.show()