# Abschluss

- Line bzw Barchart Master vs Bachelor u4ber die Jahre aggregiert für FB
- Treemap für alle Studiengänge

In [6]:
import plotly.io as pio

# Benutzerdefiniertes Template definieren
infoviz_template = dict(
    layout=dict(
        template="plotly_white",
        title=dict(
            font=dict(size=20, family="Arial", weight="bold", color="black"),
            y=0.91,  # Titel weiter nach oben
            x=0.05,  # Links ausgerichtet
            xanchor="left",  # Linksbündig
        ),
        xaxis=dict(
            showgrid=False,
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelposition="outside bottom"
        ),
        yaxis=dict(
            showgrid=True, gridcolor="lightgrey",
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelposition="outside left"
        ),
    )
)
pio.templates["infoviz"] = infoviz_template

In [1]:
import pandas as pd

# Datei einlesen (ersetze 'data.xlsx' durch den tatsächlichen Dateinamen)
file_path = "data.xlsx"

# Alle Sheets einlesen, aber das erste Blatt ignorieren
sheets = pd.read_excel(file_path, sheet_name=None, engine="openpyxl")

# Erstes Blatt entfernen
sheets.pop(next(iter(sheets)))

# Container für die bereinigten DataFrames
cleaned_sheets = {}

# Durch alle Jahre iterieren und DataFrames bereinigen
for jahr, df in sheets.items():
    df.columns = df.columns.astype(str).str.strip()  # Spaltennamen bereinigen
    cleaned_sheets[jahr] = df  # Speichern der bereinigten Daten

In [3]:
abschluss_anteile = {}

for jahr, df in cleaned_sheets.items():
    # Studiengänge automatisch erkennen (Bachelor und Master)
    bachelor_studiengaenge = [col for col in df.columns if "Bachelor" in col]
    master_studiengaenge = [col for col in df.columns if "Master" in col]

    # Extrahiere die Gesamtanzahl der Studierenden pro Abschluss
    total_bachelor = df[df["Category"] == "Anzahl Studierende"][bachelor_studiengaenge].sum().sum()
    total_master = df[df["Category"] == "Anzahl Studierende"][master_studiengaenge].sum().sum()

    total_students = total_bachelor + total_master

    if total_students > 0:
        abschluss_anteile[jahr] = {
            "Bachelor": (total_bachelor / total_students) * 100,
            "Master": (total_master / total_students) * 100
        }
    else:
        abschluss_anteile[jahr] = {"Bachelor": 0, "Master": 0}

# DataFrame erstellen
abschluss_df = pd.DataFrame.from_dict(abschluss_anteile, orient="index").reset_index()
abschluss_df.rename(columns={"index": "Jahr"}, inplace=True)
abschluss_df = abschluss_df.sort_values(by="Jahr")

In [4]:
abschluss_df

Unnamed: 0,Jahr,Bachelor,Master
0,2013,63.706389,36.293611
1,2014,67.319983,32.680017
2,2015,69.59707,30.40293
3,2016,64.989138,35.010862
4,2017,59.922179,40.077821
5,2018,50.026525,49.973475
6,2019,58.325833,41.674167
7,2020,59.710316,40.289684
8,2021,57.366589,42.633411
9,2022,55.807028,44.192972


In [7]:
import plotly.express as px

# DataFrame für Plot umstrukturieren
abschluss_df_melted = abschluss_df.melt(id_vars="Jahr", var_name="Abschluss", value_name="Anteil (%)")

fig = px.line(
    abschluss_df_melted,
    x="Jahr",
    y="Anteil (%)",
    color="Abschluss",
    markers=True,
    title="Entwicklung der Abschlussarten (Bachelor vs. Master)",
    color_discrete_sequence=px.colors.qualitative.Safe,
    template="infoviz"
)

fig.update_traces(line=dict(width=3))  # Linie dicker machen

fig.update_layout(
    yaxis=dict(range=[0, 100]),  # Prozentwerte zwischen 0 und 100
    xaxis=dict(range=[2013, 2025])
)

fig.show()

In [34]:
import plotly.express as px


df_2024 = cleaned_sheets["2024"]

# Studiengänge automatisch erkennen (Bachelor und Master)
studiengaenge = [col for col in df_2024.columns if "Bachelor" in col or "Master" in col]

# Extrahiere die Anzahl der Studierenden pro Studiengang
studiengang_verteilung = df_2024[df_2024["Category"] == "Anzahl Studierende"][studiengaenge].T

studiengang_verteilung

Unnamed: 0,66
Bachelor Agrarwissenschaften,293.0
Bachelor Ernährungswissenschaften,493.0
Bachelor Nachwachsende Rohstoffe und Bioressourcen,69.0
Bachelor Ökotrophologie,429.0
Bachelor Umwelt und globaler Wandel,116.0
Master Agrar- und Ressourcenökonomie,53.0
Master Agrobiotechnologie,139.0
Master Ernährungswissenschaften,338.0
Master Getränketechnologie*,26.0
Master Informationstechnologie in den Agrar- und Umweltwissenschaften*,13.0


In [35]:
studiengang_verteilung.columns = ["Anzahl Studierende"]
studiengang_verteilung["Studiengang"] = studiengang_verteilung.index
studiengang_verteilung.index
studiengang_verteilung

Unnamed: 0,Anzahl Studierende,Studiengang
Bachelor Agrarwissenschaften,293.0,Bachelor Agrarwissenschaften
Bachelor Ernährungswissenschaften,493.0,Bachelor Ernährungswissenschaften
Bachelor Nachwachsende Rohstoffe und Bioressourcen,69.0,Bachelor Nachwachsende Rohstoffe und Bioressou...
Bachelor Ökotrophologie,429.0,Bachelor Ökotrophologie
Bachelor Umwelt und globaler Wandel,116.0,Bachelor Umwelt und globaler Wandel
Master Agrar- und Ressourcenökonomie,53.0,Master Agrar- und Ressourcenökonomie
Master Agrobiotechnologie,139.0,Master Agrobiotechnologie
Master Ernährungswissenschaften,338.0,Master Ernährungswissenschaften
Master Getränketechnologie*,26.0,Master Getränketechnologie*
Master Informationstechnologie in den Agrar- und Umweltwissenschaften*,13.0,Master Informationstechnologie in den Agrar- u...


In [37]:
# Treemap erstellen
fig = px.treemap(
    studiengang_verteilung,
    path=["Studiengang"],
    values="Anzahl Studierende",
    title="Studierendenverteilung nach Studiengang (2024)",
    color_discrete_sequence=px.colors.qualitative.Safe,
    template="infoviz"
)

fig.show()

In [63]:
import plotly.express as px


df_2024 = cleaned_sheets["2024"]

# Studiengänge automatisch erkennen (Bachelor und Master)
bachelor_studiengaenge = [col for col in df_2024.columns if "Bachelor" in col]
master_studiengaenge = [col for col in df_2024.columns if "Master" in col]

# Extrahiere die Anzahl der Studierenden pro Studiengang
studiengang_verteilung = df_2024[df_2024["Category"] == "Anzahl Studierende"][bachelor_studiengaenge + master_studiengaenge].T
studiengang_verteilung.columns = ["Anzahl Studierende"]
studiengang_verteilung["Studiengang"] = studiengang_verteilung.index

# Hinzufügen der übergeordneten Kategorie ("Bachelor" oder "Master")
studiengang_verteilung["Studienabschluss"] = studiengang_verteilung["Studiengang"].apply(
    lambda x: "Bachelor" if "Bachelor" in x else "Master"
)




# Farbkodierung für Studiengänge:  
# Bachelor-Studiengänge erhalten Farben aus "Set1", Master-Studiengänge aus "Set2"
bachelor_colors = px.colors.qualitative.Safe
master_colors = px.colors.qualitative.Safe

# Erstellen einer Farbkodierung für jeden Studiengang
color_map = {}
bachelor_count = 0
master_count = 0

for studiengang in studiengang_verteilung["Studiengang"]:
    if "Bachelor" in studiengang:
        color_map[studiengang] = bachelor_colors[bachelor_count % len(bachelor_colors)]
        bachelor_count += 1
    else:
        color_map[studiengang] = master_colors[master_count % len(master_colors)]
        master_count += 1

# Entferne "Bachelor" und "Master" aus den Studiengangsnamen
studiengang_verteilung["Studiengang"] = studiengang_verteilung["Studiengang"].str.replace("Bachelor ", "", regex=False)
studiengang_verteilung["Studiengang"] = studiengang_verteilung["Studiengang"].str.replace("Master ", "", regex=False)

# Treemap erstellen
fig = px.treemap(
    studiengang_verteilung,
    path=["Studienabschluss", "Studiengang"],  # Erstes Level: Bachelor/Master, Zweites Level: Studiengang
    values="Anzahl Studierende",
    title="Studierendenverteilung nach Studiengang und Abschluss (2024)",
    color="Studiengang",  # Jeder Studiengang bekommt eine eigene Farbe
    color_discrete_map=color_map,
    template="infoviz"
)

# Schriftgrößen anpassen
fig.update_layout(
    font=dict(size=18),  # Generelle Schriftgröße anpassen
)

# Schriftgröße für Labels innerhalb der Treemap anpassen
fig.update_traces(
    textinfo="label+value",  # Zeigt den Namen und die Anzahl an
    textfont=dict(size=18, color="black", weight="bold"),  # Größe der Texte innerhalb der Treemap
)
fig.update_layout(
    width=1200,  # Breite in Pixeln
    height=800   # Höhe in Pixeln
)

fig.show()

In [45]:
# Treemap erstellen
fig = px.bar(
    studiengang_verteilung.sort_values(by="Anzahl Studierende", ascending=False),
    x="Studiengang",
    y="Anzahl Studierende",
    title="Studierendenverteilung nach Studiengang (2024)",
    color_discrete_sequence=px.colors.qualitative.Safe,
    template="infoviz"
)

fig.show()

In [41]:
fig = px.treemap(
    studiengang_verteilung,
    path=["Studiengang"],
    values="Anzahl Studierende",
    title="Studierendenverteilung nach Studiengang (2024)",
    color="Studiengang",
    color_discrete_sequence=px.colors.qualitative.Safe,
    template="infoviz"
)

# Anpassungen für Form & Layout
fig.update_layout(
    shapes="remainder",
    margin=dict(t=50, l=25, r=25, b=25)
)

fig.update_traces(
    tiling=dict(packing="squarify"),
    marker=dict(pad=5, line=dict(width=2, color="white"))
)

fig.show()

ValueError: 
    Invalid value of type 'builtins.str' received for the 'shapes' property of layout
        Received value: 'remainder'

    The 'shapes' property is a tuple of instances of
    Shape that may be specified as:
      - A list or tuple of instances of plotly.graph_objs.layout.Shape
      - A list or tuple of dicts of string/value properties that
        will be passed to the Shape constructor

        Supported dict properties:
            
            editable
                Determines whether the shape could be activated
                for edit or not. Has no effect when the older
                editable shapes mode is enabled via
                `config.editable` or
                `config.edits.shapePosition`.
            fillcolor
                Sets the color filling the shape's interior.
                Only applies to closed shapes.
            fillrule
                Determines which regions of complex paths
                constitute the interior. For more info please
                visit https://developer.mozilla.org/en-
                US/docs/Web/SVG/Attribute/fill-rule
            label
                :class:`plotly.graph_objects.layout.shape.Label
                ` instance or dict with compatible properties
            layer
                Specifies whether shapes are drawn below
                gridlines ("below"), between gridlines and
                traces ("between") or above traces ("above").
            legend
                Sets the reference to a legend to show this
                shape in. References to these legends are
                "legend", "legend2", "legend3", etc. Settings
                for these legends are set in the layout, under
                `layout.legend`, `layout.legend2`, etc.
            legendgroup
                Sets the legend group for this shape. Traces
                and shapes part of the same legend group
                hide/show at the same time when toggling legend
                items.
            legendgrouptitle
                :class:`plotly.graph_objects.layout.shape.Legen
                dgrouptitle` instance or dict with compatible
                properties
            legendrank
                Sets the legend rank for this shape. Items and
                groups with smaller ranks are presented on
                top/left side while with "reversed"
                `legend.traceorder` they are on bottom/right
                side. The default legendrank is 1000, so that
                you can use ranks less than 1000 to place
                certain items before all unranked items, and
                ranks greater than 1000 to go after all
                unranked items. When having unranked or equal
                rank items shapes would be displayed after
                traces i.e. according to their order in data
                and layout.
            legendwidth
                Sets the width (in px or fraction) of the
                legend for this shape.
            line
                :class:`plotly.graph_objects.layout.shape.Line`
                instance or dict with compatible properties
            name
                When used in a template, named items are
                created in the output figure in addition to any
                items the figure already has in this array. You
                can modify these items in the output figure by
                making your own item with `templateitemname`
                matching this `name` alongside your
                modifications (including `visible: false` or
                `enabled: false` to hide it). Has no effect
                outside of a template.
            opacity
                Sets the opacity of the shape.
            path
                For `type` "path" - a valid SVG path with the
                pixel values replaced by data values in
                `xsizemode`/`ysizemode` being "scaled" and
                taken unmodified as pixels relative to
                `xanchor` and `yanchor` in case of "pixel" size
                mode. There are a few restrictions / quirks
                only absolute instructions, not relative. So
                the allowed segments are: M, L, H, V, Q, C, T,
                S, and Z arcs (A) are not allowed because
                radius rx and ry are relative. In the future we
                could consider supporting relative commands,
                but we would have to decide on how to handle
                date and log axes. Note that even as is, Q and
                C Bezier paths that are smooth on linear axes
                may not be smooth on log, and vice versa. no
                chained "polybezier" commands - specify the
                segment type for each one. On category axes,
                values are numbers scaled to the serial numbers
                of categories because using the categories
                themselves there would be no way to describe
                fractional positions On data axes: because
                space and T are both normal components of path
                strings, we can't use either to separate date
                from time parts. Therefore we'll use underscore
                for this purpose: 2015-02-21_13:45:56.789
            showlegend
                Determines whether or not this shape is shown
                in the legend.
            templateitemname
                Used to refer to a named item in this array in
                the template. Named items from the template
                will be created even without a matching item in
                the input figure, but you can modify one by
                making an item with `templateitemname` matching
                its `name`, alongside your modifications
                (including `visible: false` or `enabled: false`
                to hide it). If there is no template or no
                matching item, this item will be hidden unless
                you explicitly show it with `visible: true`.
            type
                Specifies the shape type to be drawn. If
                "line", a line is drawn from (`x0`,`y0`) to
                (`x1`,`y1`) with respect to the axes' sizing
                mode. If "circle", a circle is drawn from
                ((`x0`+`x1`)/2, (`y0`+`y1`)/2)) with radius
                (|(`x0`+`x1`)/2 - `x0`|, |(`y0`+`y1`)/2
                -`y0`)|) with respect to the axes' sizing mode.
                If "rect", a rectangle is drawn linking
                (`x0`,`y0`), (`x1`,`y0`), (`x1`,`y1`),
                (`x0`,`y1`), (`x0`,`y0`) with respect to the
                axes' sizing mode. If "path", draw a custom SVG
                path using `path`. with respect to the axes'
                sizing mode.
            visible
                Determines whether or not this shape is
                visible. If "legendonly", the shape is not
                drawn, but can appear as a legend item
                (provided that the legend itself is visible).
            x0
                Sets the shape's starting x position. See
                `type` and `xsizemode` for more info.
            x0shift
                Shifts `x0` away from the center of the
                category when `xref` is a "category" or
                "multicategory" axis. -0.5 corresponds to the
                start of the category and 0.5 corresponds to
                the end of the category.
            x1
                Sets the shape's end x position. See `type` and
                `xsizemode` for more info.
            x1shift
                Shifts `x1` away from the center of the
                category when `xref` is a "category" or
                "multicategory" axis. -0.5 corresponds to the
                start of the category and 0.5 corresponds to
                the end of the category.
            xanchor
                Only relevant in conjunction with `xsizemode`
                set to "pixel". Specifies the anchor point on
                the x axis to which `x0`, `x1` and x
                coordinates within `path` are relative to. E.g.
                useful to attach a pixel sized shape to a
                certain data value. No effect when `xsizemode`
                not set to "pixel".
            xref
                Sets the shape's x coordinate axis. If set to a
                x axis id (e.g. "x" or "x2"), the `x` position
                refers to a x coordinate. If set to "paper",
                the `x` position refers to the distance from
                the left of the plotting area in normalized
                coordinates where 0 (1) corresponds to the left
                (right). If set to a x axis ID followed by
                "domain" (separated by a space), the position
                behaves like for "paper", but refers to the
                distance in fractions of the domain length from
                the left of the domain of that axis: e.g., *x2
                domain* refers to the domain of the second x
                axis and a x position of 0.5 refers to the
                point between the left and the right of the
                domain of the second x axis.
            xsizemode
                Sets the shapes's sizing mode along the x axis.
                If set to "scaled", `x0`, `x1` and x
                coordinates within `path` refer to data values
                on the x axis or a fraction of the plot area's
                width (`xref` set to "paper"). If set to
                "pixel", `xanchor` specifies the x position in
                terms of data or plot fraction but `x0`, `x1`
                and x coordinates within `path` are pixels
                relative to `xanchor`. This way, the shape can
                have a fixed width while maintaining a position
                relative to data or plot fraction.
            y0
                Sets the shape's starting y position. See
                `type` and `ysizemode` for more info.
            y0shift
                Shifts `y0` away from the center of the
                category when `yref` is a "category" or
                "multicategory" axis. -0.5 corresponds to the
                start of the category and 0.5 corresponds to
                the end of the category.
            y1
                Sets the shape's end y position. See `type` and
                `ysizemode` for more info.
            y1shift
                Shifts `y1` away from the center of the
                category when `yref` is a "category" or
                "multicategory" axis. -0.5 corresponds to the
                start of the category and 0.5 corresponds to
                the end of the category.
            yanchor
                Only relevant in conjunction with `ysizemode`
                set to "pixel". Specifies the anchor point on
                the y axis to which `y0`, `y1` and y
                coordinates within `path` are relative to. E.g.
                useful to attach a pixel sized shape to a
                certain data value. No effect when `ysizemode`
                not set to "pixel".
            yref
                Sets the shape's y coordinate axis. If set to a
                y axis id (e.g. "y" or "y2"), the `y` position
                refers to a y coordinate. If set to "paper",
                the `y` position refers to the distance from
                the bottom of the plotting area in normalized
                coordinates where 0 (1) corresponds to the
                bottom (top). If set to a y axis ID followed by
                "domain" (separated by a space), the position
                behaves like for "paper", but refers to the
                distance in fractions of the domain length from
                the bottom of the domain of that axis: e.g.,
                *y2 domain* refers to the domain of the second
                y  axis and a y position of 0.5 refers to the
                point between the bottom and the top of the
                domain of the second y axis.
            ysizemode
                Sets the shapes's sizing mode along the y axis.
                If set to "scaled", `y0`, `y1` and y
                coordinates within `path` refer to data values
                on the y axis or a fraction of the plot area's
                height (`yref` set to "paper"). If set to
                "pixel", `yanchor` specifies the y position in
                terms of data or plot fraction but `y0`, `y1`
                and y coordinates within `path` are pixels
                relative to `yanchor`. This way, the shape can
                have a fixed height while maintaining a
                position relative to data or plot fraction.
