# Alter



## Import

In [10]:
import plotly.io as pio
import pandas as pd
import plotly.express as px

## Template

In [11]:
infoviz_template = dict(
    layout=dict(
        template="plotly_white",
        title=dict(
            font=dict(size=20, family="Arial", weight="bold", color="black"),
            xanchor="left",  
            xref="paper",
            x=0,
            subtitle=dict(
                text="",
                font=dict(color="gray", size=13),
            ),
        ),
        xaxis=dict(
            showgrid=False,
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelstandoff=10,
            ticklabelposition="outside bottom"
        ),
        yaxis=dict(
            showgrid=True, gridcolor="lightgrey",
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelstandoff=10,
            ticklabelposition="outside left"
        ),
    )
)
pio.templates["infoviz"] = infoviz_template

## Import Data

In [15]:
file_path = "data.xlsx"
sheets = pd.read_excel(file_path, sheet_name=None, engine="openpyxl")
sheets.pop(next(iter(sheets)))  # Drop first (irrelevant) sheet

# Define age groups
altersgruppen = [
    "unter 18 Jahre (%)",
    "18 bis 20 Jahre (%)",
    "21 bis 23 Jahre (%)",
    "24 bis 26 Jahre (%)",
    "27 bis 29 Jahre (%)",
    "30 bis 32 Jahre (%)",
    "33 bis 35 Jahre (%)",
    "älter als 35 Jahre (%)"
]

jahr = "2024"
df = sheets[jahr]

# Select Bachelor/Master columns
bachelor_studiengaenge = df.filter(like="Bachelor").columns
master_studiengaenge = df.filter(like="Master").columns

# Prepare data container
pyramiden_data = {
    "Altersgruppe": [],
    "Bachelor (%)": [],
    "Master (%)": []
}

# Iterate over age groups
for category in altersgruppen:
    row = df[
        (df["Variable"] == "Alter")
        & (df["Category"].str.contains(category, na=False, regex=False))
    ]

    if not row.empty:
        # Absolute numbers by multiplying percentage with total number
        abs_bachelor = (
            row[bachelor_studiengaenge] / 100
        ) * df[
            (df["Variable"] == "Alter")
            & (df["Category"] == "Anzahl")
        ][bachelor_studiengaenge].iloc[0]

        abs_master = (
            row[master_studiengaenge] / 100
        ) * df[
            (df["Variable"] == "Alter")
            & (df["Category"] == "Anzahl")
        ][master_studiengaenge].iloc[0]

        # Sum over all study programs
        total_abs_bachelor = abs_bachelor.sum().sum()
        total_abs_master = abs_master.sum().sum()

        total_bachelor_students = df[
            (df["Variable"] == "Alter")
            & (df["Category"] == "Anzahl")
        ][bachelor_studiengaenge].sum().sum()

        total_master_students = df[
            (df["Variable"] == "Alter")
            & (df["Category"] == "Anzahl")
        ][master_studiengaenge].sum().sum()

        # Compute weighted percentage
        weighted_bachelor = (
            total_abs_bachelor / total_bachelor_students * 100
            if total_bachelor_students > 0 else 0
        )
        weighted_master = (
            total_abs_master / total_master_students * 100
            if total_master_students > 0 else 0
        )

        # Fill result structure
        pyramiden_data["Altersgruppe"].append(category.replace(" (%)", ""))
        pyramiden_data["Bachelor (%)"].append(-weighted_bachelor)  # negative for left side
        pyramiden_data["Master (%)"].append(weighted_master)       # positive for right side

# Create DataFrame for plotting
df_pyramide = pd.DataFrame(pyramiden_data)
df_pyramide = df_pyramide.sort_values(by="Altersgruppe", ascending=True)

# Altersverteilung

In [16]:
fig = px.bar(
    df_pyramide,
    x=["Bachelor (%)", "Master (%)"],
    y="Altersgruppe",
    orientation="h",
    title="Altersverteilung: <span style='color:#73c6e9'>Bachelorstudierende</span> jünger, "
          "<span style='color:#cc5b6e;'>Masterstudierende</span> älter",
    subtitle="Ergebnisse der Studierendenbefragung 2024 im FB09",
    color_discrete_map={
        "Bachelor (%)": px.colors.qualitative.Safe[0],
        "Master (%)": px.colors.qualitative.Safe[1]
    },
)

# Layout configuration
fig.update_layout(
    margin=dict(l=180),
    height=500,
    width=1400,
    template="infoviz",
    xaxis_title="Anteil der Studierenden (Bachelor/Master)",
    yaxis_title="",
    xaxis=dict(
        tickmode="array",
        tickvals=[-40, -30, -20, -10, 0, 10, 20, 30, 40],
        ticktext=["40%", "30%", "20%", "10%", "0%", "10%", "20%", "30%", "40%"]
    ),
    showlegend=False
)

fig.show()
fig.write_image("Plots/pdf/alter1.pdf")