## Import

In [None]:
import pandas as pd
import plotly.io as pio
import plotly.express as px

In [None]:
infoviz_template = dict(
    layout=dict(
        template="plotly_white",
        title=dict(
            font=dict(size=20, family="Arial", weight="bold", color="black"),
            y=0.91, 
            x=0.05, 
            xanchor="left", 
        ),
        xaxis=dict(
            showgrid=False,
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelposition="outside bottom"
        ),
        yaxis=dict(
            showgrid=True, gridcolor="lightgrey",
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelposition="outside left"
        ),
    )
)
pio.templates["infoviz"] = infoviz_template

In [None]:
years = [str(year) for year in range(2013, 2025)]

df = pd.read_excel("data.xlsx", sheet_name=years)

data_list = []

for year, df in df.items():

    df['Jahr'] = year
    data_list.append(df)

final_df = pd.concat(data_list, ignore_index=True)


In [None]:
final_df[final_df['Variable'] == 'Fachsemester'] 

In [313]:
df = final_df[final_df.columns[~final_df.columns.str.contains(r'\*')]] # drop * in the data
df.columns = df.columns.str.replace('*', '', regex=False)# just to be sure
df = df.rename(columns={'Master Getränketechnologi':'Master Getränketechnologie'})# fix a typo
clean_columns = {}
for col in df.columns:
    clean_name = col.strip()  # get rid of other human typos
    if clean_name in clean_columns:
        clean_columns[clean_name].append(col)
    else:
        clean_columns[clean_name] = [col]

# smack together the similar col names 
merged_df = df[['Jahr']].copy()
for clean_name, original_cols in clean_columns.items():
    if len(original_cols) > 1:
        merged_df[clean_name] = df[original_cols].sum(axis=1)
    else:
        merged_df[clean_name] = df[original_cols[0]]
df = merged_df.fillna(value=0) # use if needed for plotting

In [314]:
df = df[df['Variable'].isin(['Fachsemester'])]
df = df[df['Variable'] == 'Fachsemester'].drop(columns='Variable')
df = df.groupby(['Jahr', 'Category']).sum().reset_index()
df = df[df['Category'] != 'Gesamt (%)']

In [None]:
df['Bachelor Ökotrophologie'] = df['Bachelor Ökotrophologie'].astype(float)
df['Bachelor Agrarwissenschaften'] = df['Bachelor Agrarwissenschaften'].replace(' ',0)
df['Bachelor Agrarwissenschaften'] = df['Bachelor Agrarwissenschaften'].astype(float)

In [None]:
bachelor_columns = [col for col in df.columns if 'Bachelor' in col]
master_columns = [col for col in df.columns if 'Master' in col]

df_bachelor = pd.melt(df, id_vars=['Jahr', 'Category'], value_vars=bachelor_columns,
                      var_name='Studiengang', value_name='Bachelorg Wert')
df_bachelor['Typ'] = 'Bachelor'

df_master = pd.melt(df, id_vars=['Jahr', 'Category'], value_vars=master_columns,
                    var_name='Studiengang', value_name='Master Wert')
df_master['Typ'] = 'Master'

df_combined = pd.concat([df_bachelor, df_master])


df_combined = df_combined.sort_values(by=['Jahr', 'Category', 'Typ'])


In [None]:
df_combined = df_combined.groupby(['Jahr', 'Category']).sum().reset_index().drop(columns=['Studiengang', 'Typ'])
df = df_combined[df_combined['Category'] != 'Anzahl']

In [None]:
# Umwandeln der Kategorie von '1 (%)', '2 (%)', etc. in numerische Werte (Entfernen von " (%)")
df['Category_num'] = df['Category'].str.extract('(\d+)').astype(int)

df_sorted = df.sort_values(by=['Jahr', 'Category_num'])
df_sorted = df_sorted.drop(columns=['Category_num'])

In [None]:
bachelor_df = df_sorted[["Jahr", "Category", "Bachelorg Wert"]].rename(columns={"Bachelorg Wert": "Wert"})
bachelor_df["Studiengang"] = "Bachelor"

master_df = df_sorted[["Jahr", "Category", "Master Wert"]].rename(columns={"Master Wert": "Wert"})
master_df["Studiengang"] = "Master"

result = pd.concat([bachelor_df, master_df])

result = result[["Jahr", "Category", "Studiengang", "Wert"]]

df_sorted = result.sort_values(by=["Jahr", "Category"]).reset_index(drop=True)


In [None]:
category_order = ["1 (%)", "2 (%)", "3 (%)", "4 (%)", "5 (%)", "6 (%)", "7 (%)", "8 (%)", "9 (%)", "10 (%)", "11 (%)", "12 (%)", "13 (%)", "14 und mehr (%)"]

In [None]:
pio.templates["infoviz"] = infoviz_template

df = pd.DataFrame(df_sorted)

category_order = ["14 und mehr (%)", "13 (%)", "12 (%)", "11 (%)", "10 (%)", "9 (%)", "8 (%)", "7 (%)", "6 (%)", "5 (%)", "4 (%)", "3 (%)", "2 (%)", "1 (%)"]

df['Category'] = df['Category'].str.replace(' (%)', '', regex=False)

fig = px.scatter(df,
                 x="Jahr", 
                 y="Category", 
                 color="Studiengang", 
                 size="Wert", 
                 hover_name="Studiengang",
                 title="Verteilung von </b><span style='color:#73c6e9;'>Bachelor</span> <b> und </b><span style='color:#cc5b6e;'>Master</span> <b> über die Jahre",
                 labels={"Jahr": "Jahr", "Category": "Fachsemester", "Wert": "Wert"},
                 size_max=60,
                 category_orders={"Category": category_order})

fig.update_layout(
    template="infoviz",
    height=1200,  
    width=1400,
     showlegend=False,
)
# Eigentlich sollten die M.A und B.A als hablkreise angezeigt werden, dabei kam jedoch PLotly an seine Grenzen 
fig.show()

In [None]:
df_sorted = df_sorted[df_sorted['Jahr'] == '2024']

In [None]:
infoviz_template = dict(
    layout=dict(
        template="plotly_white",
        title=dict(
            font=dict(size=20, family="Arial", weight="bold", color="black"),
            xanchor="left",  
            xref="paper",
            x=0,
            subtitle=dict(
                text="Vergleich der Absoluten Anzahl der Personen nach Abschluss",
                font=dict(color="gray", size=13),
            ),
        ),
        xaxis=dict(
            showgrid=False,
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelstandoff=10,
            ticklabelposition="outside bottom"
        ),
        yaxis=dict(
            showgrid=True, gridcolor="lightgrey",
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelstandoff=10,
            ticklabelposition="outside left"
        ),
    )
)
pio.templates["infoviz"] = infoviz_template

df = pd.DataFrame(df_sorted)

df['Category'] = df['Category'].str.replace(' (%)', '', regex=False)

category_order = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14 und mehr"]

df['Y-Wert'] = df['Studiengang'].map({'Bachelor': 1, 'Master': -1})


fig = px.scatter(df,
                 x="Category",
                 y="Y-Wert",
                 color="Studiengang", 
                 size="Wert", 
                 hover_name="Studiengang",
                 title="Verhältnis der Fachsemester in </b><span style='color:#73c6e9;'>Bachelor</span> <b> und </b><span style='color:#cc5b6e;'>Master</span> <b> für das Jahr 2024",
                 labels={"Jahr": "Jahr", "Category": "Kategorie", "Wert": "Wert"},
                 size_max=60,
                 category_orders={"Category": category_order},
                 color_discrete_map={"Bachelor": "#73c6e9", "Master": "#cc5b6e"})  



fig.update_layout(
    template="infoviz", 
    height=400,
    width=1400,
    yaxis_title="Abschluss",
    xaxis_title="Fachsemester",
    yaxis=dict(
        tickvals=[1, -1],
        ticktext=["Bachelor", "Master"],
        showgrid=True,
        zerolinecolor="lightgrey",
       
    ),
    showlegend=False
)
fig.update_traces(marker=dict(opacity=1))

fig.show()


In [None]:
df = pd.DataFrame(df_sorted)

df['Category'] = df['Category'].str.replace(' (%)', '', regex=False)

category_order = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14 und mehr"]

df['Y-Wert'] = df['Studiengang'].map({'Bachelor': 1, 'Master': -1})

fig = px.bar(df,
             x="Category",
             y="Wert",
             color="Studiengang",
             barmode="group",
             title="Verhältnis der Fachsemester in </b><span style='color:#73c6e9;'>Bachelor</span> <b> und </b><span style='color:#cc5b6e;'>Master</span> <b> für das Jahr 2024",
             labels={"Category": "Fachsemester", "Wert": "Anzahl der Personen"},
             category_orders={"Category": category_order},
             color_discrete_map={"Bachelor": "#73c6e9", "Master": "#cc5b6e"})

fig.update_layout(
    template="infoviz",
    height=500, 
    width=1200,
    yaxis_title="Anzahl der Personen",
    xaxis_title="Fachsemester",
    xaxis=dict(
        ticklabelstandoff=10,
        ticklabelposition="outside bottom",
    ),
    showlegend=False,
)

fig.show()