# Gender

## Import

In [27]:
import pandas as pd
import plotly.io as pio
import plotly.graph_objects as go
import plotly.express as px

## Template

In [28]:
infoviz_template = dict(
    layout=dict(
        template="plotly_white",
        title=dict(
            font=dict(size=20, family="Arial", weight="bold", color="black"),
            xanchor="left",  
            xref="paper",
            x=0,
            subtitle=dict(
                text="Absolute Geschlechterverteilung von 2013 bis 2024",
                font=dict(color="gray", size=13),
            ),
        ),
        xaxis=dict(
            showgrid=False,
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelstandoff=10,
            ticklabelposition="outside bottom"
        ),
        yaxis=dict(
            showgrid=True, gridcolor="lightgrey",
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelstandoff=10,
            ticklabelposition="outside left"
        ),
    )
)
pio.templates["infoviz"] = infoviz_template

In [29]:
# Define list of years (as strings)
years = [str(year) for year in range(2013, 2025)]

# Read all sheets into a dictionary
df = pd.read_excel("data.xlsx", sheet_name=years)

data_list = []

# Add year column to each sheet and collect
for year, sheet_df in df.items():
    sheet_df["Jahr"] = year
    data_list.append(sheet_df)

# Combine all years into one DataFrame
final_df = pd.concat(data_list, ignore_index=True)

In [30]:
# Remove columns with '*' in the name
df = final_df[final_df.columns[~final_df.columns.str.contains(r"\*")]]

# Filter for gender and participation rows
df = df[
    df["Category"].isin([
        "Männlich (%)",
        "Weiblich (%)",
        "Anzahl Teilnahmen an Studierendenbefragung"
    ])
].fillna(value=0)

# Group by year and category, summing values
df = df.groupby(by=["Jahr", "Category"]).sum().reset_index()

# Just to be sure: remove '*' from any remaining column names
df.columns = df.columns.str.replace("*", "", regex=False)


Downcasting object dtype arrays on .fillna, .ffill, .bfill is deprecated and will change in a future version. Call result.infer_objects(copy=False) instead. To opt-in to the future behavior, set `pd.set_option('future.no_silent_downcasting', True)`



In [31]:
absolute_zahlen = []
studiengaenge = df.columns[3:]

# Iterate over all survey years
for jahr in df["Jahr"].unique():
    df_jahr = df[df["Jahr"] == jahr]

    anzahl_teilnahmen = df_jahr[df_jahr["Category"] == "Anzahl Teilnahmen an Studierendenbefragung"]
    prozent_maenner = df_jahr[df_jahr["Category"] == "Männlich (%)"]
    prozent_frauen = df_jahr[df_jahr["Category"] == "Weiblich (%)"]

    if not anzahl_teilnahmen.empty and not prozent_maenner.empty and not prozent_frauen.empty:
        for studiengang in studiengaenge:
            anzahl = anzahl_teilnahmen[studiengang].values[0]

            # Calculate absolute counts, handle NaNs
            maenner_abs = (
                anzahl * prozent_maenner[studiengang].iloc[0] / 100
                if not pd.isna(prozent_maenner[studiengang].iloc[0])
                else 0
            )
            frauen_abs = (
                anzahl * prozent_frauen[studiengang].iloc[0] / 100
                if not pd.isna(prozent_frauen[studiengang].iloc[0])
                else 0
            )

            absolute_zahlen.append([jahr, studiengang, int(maenner_abs), int(frauen_abs)])

# Create final DataFrame with absolute gender counts
df_absolute = pd.DataFrame(
    absolute_zahlen,
    columns=["Jahr", "Studiengang", "Männer", "Frauen"]
)

In [32]:
df = df_absolute.groupby('Jahr').sum().drop(columns='Studiengang')

In [None]:
# Prepare mirrored values for pyramid layout
women_bins = df['Frauen'] * -1
men_bins = df['Männer'] 

y = list(range(2013, 2025, 1))

color1, color2 = px.colors.qualitative.Safe[8], px.colors.qualitative.Safe[2]

# Define layout
layout = go.Layout(
    title=rf"Verteilung von </b><span style='color:{color2};'>weiblichen</span> <b> und </b><span style='color:{color1};'>männlichen</span> <b> Studierenden im FB09",
    yaxis=go.layout.YAxis(title='Jahr'),
    xaxis=go.layout.XAxis(
        range=[-700, 700],
        tickvals=[-700, -525, -350, -175, 0, 175, 350, 525, 700],
        ticktext=[700, 525, 350, 175, 0, 175, 350, 525, 700],
        title='Anzahl Studierenden',
    ),
    width=1400,
    height=500,
    barmode='overlay',
    bargap=0.1
)

# Define bar chart
data = [
    go.Bar(
        y=y,
        x=women_bins,
        orientation='h',
        name='Frauen',
        text=-1 * women_bins.astype('int'),
        hoverinfo='text',
        marker=dict(color=color2),
    ),
    go.Bar(
        y=y,
        x=men_bins,
        orientation='h',
        name='Männer',
        text= men_bins.astype('int'),
        hoverinfo='x',
        marker=dict(color=color1),
        
    ),

]
fig = go.Figure(data=data, layout=layout)
fig.update_layout(template='plotly_white')


fig = go.Figure(data=data, layout=layout)
fig.update_layout(template='infoviz',showlegend=False)
fig.show()
fig.write_image("Plots/pdf/gender1.pdf")


In [35]:
# Define year range and counts
years = list(range(2013, 2025, 1))
men_counts = df["Männer"]
women_counts = df["Frauen"]

# Define colors
color1 = px.colors.qualitative.Safe[8]  # Männer
color2 = px.colors.qualitative.Safe[2]  # Frauen

# Layout configuration
layout = go.Layout(
    xaxis=dict(title="Jahr"),
    yaxis=dict(title="Anzahl Studierende"),
    barmode="group",
    width=1400,
    height=500,
    template="infoviz",
    title=(
        f"<b>Verteilung von </b><span style='color:{color1};'>männlichen</span> "
        f"<b> und </b><span style='color:{color2};'>weiblichen</span> <b> Studierenden im FB09"
    ),
    showlegend=False
)

# Define bar chart data
data = [
    go.Bar(
        x=years,
        y=men_counts,
        name="Männer",
        marker=dict(color=color1)
    ),
    go.Bar(
        x=years,
        y=women_counts,
        name="Frauen",
        marker=dict(color=color2)
    )
]

# Create and show figure
fig = go.Figure(data=data, layout=layout)
fig.show()
fig.write_image("Plots/pdf/gender2.pdf")