In [132]:
import pandas as pd
import plotly.io as pio
import plotly.express as px

# Benutzerdefiniertes Template definieren
infoviz_template = dict(
    layout=dict(
        template="plotly_white",
        title=dict(
            font=dict(size=20, family="Arial", weight="bold", color="black"),
            y=0.91,  # Titel weiter nach oben
            x=0.05,  # Links ausgerichtet
            xanchor="left",  # Linksbündig
        ),
        xaxis=dict(
            showgrid=False,
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelposition="outside bottom"
        ),
        yaxis=dict(
            showgrid=True, gridcolor="lightgrey",
            zerolinecolor="lightgrey",
            tickfont=dict(color="grey", size=12),
            title_font=dict(color="grey", weight="bold", size=13),
            title_standoff=15,
            ticklabelposition="outside left"
        ),
    )
)
pio.templates["infoviz"] = infoviz_template

In [133]:
years = [str(year) for year in range(2013, 2025)]

df = pd.read_excel("data.xlsx", sheet_name=years)

data_list = []

for year, df in df.items():
    # Füge das Jahr als neue Spalte hinzu
    df['Jahr'] = year
    
    # Füge den DataFrame der Liste hinzu
    data_list.append(df)

# Alle DataFrames in der Liste zu einem einzigen DataFrame zusammenführen
final_df = pd.concat(data_list, ignore_index=True)


In [134]:
final_df[final_df['Variable'] == 'Fachsemester'] 

Unnamed: 0,Variable,Category,Bachelor Agrarwissenschaften,Bachelor Ernährungswissenschaften,Bachelor Nachwachsende Rohstoffe und Bioressourcen,Bachelor Ökotrophologie,Bachelor Umwelt und globaler Wandel,Master Ernährungswissenschaften,Master Ernährungsökonomie*,Master Agrarökonomie und Betriebsmanagement,...,Master Nutzpflanzenwissenschaften,Master Oenologie/Weinwirtschaft,Master Transition Management,Master Sustainable Transition*,Master Agrar- und Ressourcenökonomie*,Master Getränketechnologie*,Master Informationstechnologie in den Agrar- und Umweltwissenschaften*,Master Nachhaltige Ernährungswirtschaft,Master Sustainable Transition,Master Nutztierwissenschaften*
22,Fachsemester,1 (%),36.0,36.0,,31.0,32.0,21.0,19.0,40.0,...,,,,,,,,,,
23,Fachsemester,2 (%),2.0,,,,,9.0,19.0,20.0,...,,,,,,,,,,
24,Fachsemester,3 (%),33.0,28.0,,30.0,35.0,34.0,26.0,20.0,...,,,,,,,,,,
25,Fachsemester,4 (%),,1.0,,1.0,,13.0,10.0,10.0,...,,,,,,,,,,
26,Fachsemester,5 (%),20.0,24.0,,25.0,26.0,16.0,16.0,10.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
792,Fachsemester,12 (%),,,,,,,,,...,,,,,,,,,,
793,Fachsemester,13 (%),3,,,,,,,,...,,,,,,,,,,
794,Fachsemester,14 und mehr (%),,,,,,,,,...,,,,,,,,,,
795,Fachsemester,Gesamt (%),100,100.0,100.0,100.0,100.0,100.0,,,...,100.0,,100.0,,,,,100.0,100.0,


In [135]:
df = final_df[final_df.columns[~final_df.columns.str.contains(r'\*')]] # drop * in the data
df.columns = df.columns.str.replace('*', '', regex=False)# just to be sure
df = df.rename(columns={'Master Getränketechnologi':'Master Getränketechnologie'})# fix a typo
clean_columns = {}
for col in df.columns:
    clean_name = col.strip()  # get rid of other human typos
    if clean_name in clean_columns:
        clean_columns[clean_name].append(col)
    else:
        clean_columns[clean_name] = [col]

# smack together the similar col names 
merged_df = df[['Jahr']].copy()
for clean_name, original_cols in clean_columns.items():
    if len(original_cols) > 1:
        merged_df[clean_name] = df[original_cols].sum(axis=1)
    else:
        merged_df[clean_name] = df[original_cols[0]]
df = merged_df.fillna(value=0) # use if needed for plotting

In [136]:
df = df[df['Variable'].isin(['Fachsemester'])]
df = df[df['Variable'] == 'Fachsemester'].drop(columns='Variable')
df = df.groupby(['Jahr', 'Category']).sum().reset_index()
df = df[df['Category'] != 'Gesamt (%)']

In [137]:
df['Bachelor Ökotrophologie'] = df['Bachelor Ökotrophologie'].astype(float)
df['Bachelor Agrarwissenschaften'] = df['Bachelor Agrarwissenschaften'].replace(' ',0)
df['Bachelor Agrarwissenschaften'] = df['Bachelor Agrarwissenschaften'].astype(float)

  df['Bachelor Agrarwissenschaften'] = df['Bachelor Agrarwissenschaften'].replace(' ',0)


In [138]:
df

Unnamed: 0,Jahr,Category,Bachelor Agrarwissenschaften,Bachelor Ernährungswissenschaften,Bachelor Nachwachsende Rohstoffe und Bioressourcen,Bachelor Ökotrophologie,Bachelor Umwelt und globaler Wandel,Master Ernährungswissenschaften,Master Agrarökonomie und Betriebsmanagement,Master Nutztierwissenschaften,...,Master Transition Management,Master Ökotrophologie,Master Agrobiotechnologie,Master Insect Biotechnology and Bioresources,Bachelor Umweltmanagement,Master Ernährungsökonomie,Master Nutzpflanzenwissenschaften,Master Oenologie/Weinwirtschaft,Master Nachhaltige Ernährungswirtschaft,Master Sustainable Transition
0,2013,1 (%),36.0,36.0,0.0,31.0,32.0,21.0,40.0,25.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2013,10 (%),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2013,11 (%),1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2013,12 (%),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2013,13 (%),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186,2024,6 (%),15.0,7.0,6.0,2.0,6.0,3.0,0.0,0.0,...,0.0,17.0,7.0,10.0,0.0,0.0,0.0,0.0,17.0,0.0
187,2024,7 (%),12.0,9.0,17.0,11.0,29.0,0.0,0.0,0.0,...,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0
188,2024,8 (%),3.0,1.0,6.0,4.0,6.0,0.0,0.0,0.0,...,0.0,0.0,7.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0
189,2024,9 (%),6.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0


In [139]:
# Spalten für Bachelor und Master extrahieren
bachelor_columns = [col for col in df.columns if 'Bachelor' in col]
master_columns = [col for col in df.columns if 'Master' in col]

# Melde Bachelor und Master in separaten DataFrames an
df_bachelor = pd.melt(df, id_vars=['Jahr', 'Category'], value_vars=bachelor_columns,
                      var_name='Studiengang', value_name='Bachelorg Wert')
df_bachelor['Typ'] = 'Bachelor'

df_master = pd.melt(df, id_vars=['Jahr', 'Category'], value_vars=master_columns,
                    var_name='Studiengang', value_name='Master Wert')
df_master['Typ'] = 'Master'

# Kombinieren der DataFrames für Bachelor und Master
df_combined = pd.concat([df_bachelor, df_master])

# Sortieren nach Jahr und Category
df_combined = df_combined.sort_values(by=['Jahr', 'Category', 'Typ'])


In [140]:
df_combined = df_combined.groupby(['Jahr', 'Category']).sum().reset_index().drop(columns=['Studiengang', 'Typ'])
df = df_combined[df_combined['Category'] != 'Anzahl']
df

Unnamed: 0,Jahr,Category,Bachelorg Wert,Master Wert
0,2013,1 (%),135.0,380.0
1,2013,10 (%),0.0,0.0
2,2013,11 (%),1.0,0.0
3,2013,12 (%),0.0,0.0
4,2013,13 (%),0.0,0.0
...,...,...,...,...
174,2024,5 (%),118.0,180.0
175,2024,6 (%),36.0,59.0
176,2024,7 (%),78.0,17.0
177,2024,8 (%),20.0,17.0


In [141]:


# Umwandeln der Kategorie von '1 (%)', '2 (%)', etc. in numerische Werte (Entfernen von " (%)")
df['Category_num'] = df['Category'].str.extract('(\d+)').astype(int)

# Sortieren nach Jahr und numerischer Kategorie
df_sorted = df.sort_values(by=['Jahr', 'Category_num'])

# Entfernen der Hilfsspalte 'Category_num'
df_sorted = df_sorted.drop(columns=['Category_num'])



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Category_num'] = df['Category'].str.extract('(\d+)').astype(int)


In [142]:
df_sorted

Unnamed: 0,Jahr,Category,Bachelorg Wert,Master Wert
0,2013,1 (%),135.0,380.0
6,2013,2 (%),2.0,78.0
7,2013,3 (%),126.0,191.0
8,2013,4 (%),2.0,72.0
9,2013,5 (%),95.0,62.0
...,...,...,...,...
166,2024,10 (%),7.0,0.0
167,2024,11 (%),9.0,15.0
168,2024,12 (%),0.0,0.0
169,2024,13 (%),3.0,0.0


In [147]:
import plotly.graph_objects as go
import pandas as pd

# Beispiel-Daten (ersetze dies mit deinem DataFrame)
data = {
    'Jahr': [2013, 2013, 2013, 2013, 2013, 2024, 2024, 2024, 2024, 2024],
    'Category': ['1 (%)', '2 (%)', '3 (%)', '4 (%)', '5 (%)', '10 (%)', '11 (%)', '12 (%)', '13 (%)', '14 und mehr (%)'],
    'Bachelorg Wert': [135.0, 2.0, 126.0, 2.0, 95.0, 7.0, 9.0, 0.0, 3.0, 0.0],
    'Master Wert': [380.0, 78.0, 191.0, 72.0, 62.0, 0.0, 15.0, 0.0, 0.0, 0.0],
}

# Erstelle den DataFrame
df = pd.DataFrame(data)

# Definiere Kategorien und Jahre für die Y-Achse und X-Achse
categories = df['Category'].unique()
years = df['Jahr'].unique()

# Daten für das gestapelte Balkendiagramm
bachelor_bin = df['Bachelorg Wert']  # Bachelor-Werte
master_bin = df['Master Wert']  # Master-Werte

# Erstelle die Daten für den gestapelten Balkenplot
trace_bachelor = go.Bar(
    y=categories,
    x=bachelor_bin,
    name='Bachelor Wert',
    orientation='h',
    marker=dict(color='green'),
    text=bachelor_bin,  # Anzeige des Textes (Wert)
    hoverinfo='x+text'
)

trace_master = go.Bar(
    y=categories,
    x=master_bin,
    name='Master Wert',
    orientation='h',
    marker=dict(color='orange'),
    text=master_bin,  # Anzeige des Textes (Wert)
    hoverinfo='x+text'
)

# Layout des Diagramms
layout = go.Layout(
    title='Bachelor und Master Werte pro Kategorie',
    barmode='stack',  # Balken übereinander stapeln
    yaxis=dict(title='Category'),
    xaxis=dict(title='Summe der Werte'),
    showlegend=True
)

# Erstelle das Diagramm und zeige es an
fig = go.Figure(data=[trace_bachelor, trace_master], layout=layout)
fig.show()


In [148]:
import plotly.graph_objects as go
import pandas as pd

# Beispiel-Daten (ersetze dies mit deinem DataFrame)
data = {
    'Jahr': [2013, 2013, 2013, 2013, 2013, 2024, 2024, 2024, 2024, 2024],
    'Category': ['1 (%)', '2 (%)', '3 (%)', '4 (%)', '5 (%)', '10 (%)', '11 (%)', '12 (%)', '13 (%)', '14 und mehr (%)'],
    'Bachelorg Wert': [135.0, 2.0, 126.0, 2.0, 95.0, 7.0, 9.0, 0.0, 3.0, 0.0],
    'Master Wert': [380.0, 78.0, 191.0, 72.0, 62.0, 0.0, 15.0, 0.0, 0.0, 0.0],
}

# Erstelle den DataFrame
df = pd.DataFrame(data)

# Alle Kategorien (wir setzen die Reihenfolge manuell auf Category)
categories = sorted(df['Category'].unique(), key=lambda x: int(x.split()[0]))

# Data Preparation for Stacked Bars
bachelor_bin = df.pivot(index='Jahr', columns='Category', values='Bachelorg Wert').fillna(0)
master_bin = df.pivot(index='Jahr', columns='Category', values='Master Wert').fillna(0)

# Farben für jede Kategorie definieren (optional)
category_colors = {
    '1 (%)': 'green', '2 (%)': 'blue', '3 (%)': 'red', '4 (%)': 'purple', 
    '5 (%)': 'orange', '10 (%)': 'pink', '11 (%)': 'brown', '12 (%)': 'cyan', 
    '13 (%)': 'magenta', '14 und mehr (%)': 'yellow'
}

# Layout des Diagramms
layout = go.Layout(
    title='Bachelor und Master Werte pro Jahr und Kategorie',
    barmode='stack',  # Balken übereinander stapeln
    yaxis=dict(title='Jahr'),
    xaxis=dict(title='Werte'),
    showlegend=True
)

# Erstelle die Daten für das gestapelte Balkendiagramm
data = []
for category in categories:
    data.append(
        go.Bar(
            y=bachelor_bin.index,
            x=bachelor_bin[category],
            name=f'Bachelor - {category}',
            orientation='h',
            marker=dict(color=category_colors.get(category, 'gray')),
            hoverinfo='x+name'
        )
    )
    data.append(
        go.Bar(
            y=master_bin.index,
            x=master_bin[category],
            name=f'Master - {category}',
            orientation='h',
            marker=dict(color=category_colors.get(category, 'gray')),
            hoverinfo='x+name'
        )
    )

# Erstelle das Diagramm und zeige es an
fig = go.Figure(data=data, layout=layout)
fig.show()


ModuleNotFoundError: No module named 'matplotlib'

In [150]:
import numpy as np
import matplotlib.pyplot as plt

# Beispiel-Daten als numpy Arrays
jahre = np.array([2021, 2022, 2023])
bachelor = np.array([150, 180, 200])
master = np.array([100, 120, 130])

# Gesamtzahl der Teilnehmer pro Jahr berechnen
gesamt = bachelor + master

# Erstellen des Diagramms
fig, ax = plt.subplots(figsize=(10, 6))

# Balken für Bachelor und Master (mit negativen Werten für Bachelor)
ax.barh(jahre, -bachelor, height=0.8, label="Bachelor", color='skyblue')
ax.barh(jahre, master, height=0.8, label="Master", color='salmon')

# Achsenanpassungen
ax.set_xlabel('Anzahl der Teilnehmer')
ax.set_ylabel('Jahr')
ax.set_title('Teilnehmer nach Studiengang und Jahr')

# Null in der Mitte der X-Achse
ax.axvline(x=0, color='black', linewidth=1)

# Legende
ax.legend()

# Anzeige
plt.tight_layout()
plt.show()


ModuleNotFoundError: No module named 'matplotlib'