In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import pickle

df = pd.read_excel('MINT-Woche 2023 (SuS).xlsx')

cols_to_drop = ['ID', 'Startzeit', 'Fertigstellungszeit', 'E-Mail']

# read dictionary with alternativ names for modules
with open('modules.pkl', 'rb') as f:
    modules = pickle.load(f)

modules_short = {k: v[0] for k, v in modules.items()}
modules_code = {k: v[1] for k, v in modules.items()}

# read dictionary with short column names
with open('short_names.pkl', 'rb') as f:
    short_names = pickle.load(f)

def prepare_df (df_):
    return (df_
        .drop(cols_to_drop, axis=1)
        .rename(columns=short_names)
        .assign(score = lambda x: x.score+1)
        .assign(company_score = lambda x: x.company_score+1)
        .assign(uni1_score = lambda x: x.uni1_score+1)
        .assign(uni2_score = lambda x: x.uni2_score+1)
        .assign(uni3_score = lambda x: x.uni3_score+1)
        .assign(workshop_score = lambda x: x.workshop_score+1)
        .assign(gender_score = lambda x: x.gender_score+1)
        .assign(company_short = lambda x: x.company.map(modules_short))
        .assign(company_code = lambda x: x.company.map(modules_code))
        .assign(uni1_short = lambda x: x.uni1.map(modules_short))
        .assign(uni1_code = lambda x: x.uni1.map(modules_code))
        .assign(uni2_short = lambda x: x.uni2.map(modules_short))
        .assign(uni2_code = lambda x: x.uni2.map(modules_code))
        .assign(uni3_short = lambda x: x.uni3.map(modules_short))
        .assign(uni3_code = lambda x: x.uni3.map(modules_code))
        .assign(workshop_short = lambda x: x.workshop.map(modules_short))
        .assign(workshop_code = lambda x: x.workshop.map(modules_code))
    )

df = prepare_df(df)
df

In [None]:
# function to plot histograms
def plot_hist(df, column, bins, suptitle, title, color):
    fig = plt.figure(figsize=(3, 3), dpi=150)
    df.hist(column=column, bins=bins, rwidth=0.8, color=color, ax=plt.gca())
    plt.suptitle(suptitle, y=1.05)
    plt.title(title)
    plt.xticks(range(1,7))
    plt.grid(False)
    plt.show()
    plt.close()

In [None]:
company_grp = df.groupby('company_short')

(company_grp
    .company_score.mean()
)

hist=df.company_score.hist(by=df.company_short)
# for group_name, group in grouped:
#     mean = group['company_score'].mean()
#     std = group['company_score'].std()
#     count = group['company_score'].count()

#     suptitle = f'{group_name}'
#     title = f'N = {count}, {mean:.1f} ± {std:.1f}'

#     plot_hist(group, 'company_score', np.arange(2.5, 7), suptitle, title, 'black')


In [None]:
hist.iloc[1]

In [None]:
types = ['company', 'uni1', 'uni2', 'uni3', 'workshop']
types_short = [type+'_short' for type in types]
types_score = [type+'_score' for type in types]

for short, score in zip(types_short, types_score):
    groups = df.groupby([short])
    for g in :
        print(groups.get_group(g)[[short, score]])


In [None]:
df.groupby('company_short').groups

In [None]:
df_f = df[df['sex'] == 'weiblich']
df_m = df[df['sex'] == 'männlich']

In [None]:
female = '\u2640'
male = '\u2642'
mean = '\u2300'

path = 'general/'
score = 'score'

m_mean = df[score].mean()
m_std = df[score].std()
m_count = len(df.index)
m_mean_f = df_f[score].mean()
m_std_f = df_f[score].std()
m_count_f = len(df_f.index)
m_mean_m = df_m[score].mean()
m_std_m = df_m[score].std()
m_count_m = len(df_m.index)

title = f'{mean} = ({m_mean:.1f} ± {m_std:.1f}) von {m_count} {male}+{female}'
title_f = f'{mean} = ({m_mean_f:.1f} ± {m_std_f:.1f}) von {m_count_f} {female}'
title_m = f'{mean} = ({m_mean_m:.1f} ± {m_std_m:.1f}) von {m_count_m} {male}'

fig = plt.figure(figsize=(3, 3), dpi=150)
df.hist(column=score, bins=np.arange(2.5, 7, 1), rwidth=0.8, color='black', ax = plt.gca())
plt.title(title)
plt.xticks(range(1,7))
plt.grid(False)
plt.savefig(path+'score.png')

fig = plt.figure(figsize=(3, 3), dpi=150)
df_f.hist(column=score, bins=np.arange(2.5, 7, 1), rwidth=0.8, color='red', ax = plt.gca())
plt.title(title_f)
plt.xticks(range(1,7))
plt.grid(False)
plt.savefig(path+'score_f.png')

fig = plt.figure(figsize=(3, 3), dpi=150)
df_m.hist(column=score, bins=np.arange(2.5, 7, 1), rwidth=0.8, color='blue', ax = plt.gca())
plt.title(title_m)
plt.xticks(range(1,7))
plt.grid(False)
plt.savefig(path+'score_m.png')

In [None]:
type = 'company'

# function to plot histograms
def plot_hist(df, column, bins, title, color):
    fig = plt.figure(figsize=(3, 3), dpi=150)
    df.hist(column=column, bins=bins, rwidth=0.8, color=color, ax = plt.gca())
    plt.title(title)
    plt.xticks(range(1,7))
    plt.grid(False)
    plt.show()


In [None]:
for index, module in df.groupby(type):
    type_short = type+'_short'
    title = f'{module[type_short].iloc[0]}'
    plot_hist(module, type+'_score', np.arange(2.5, 7, 1), title, 'black')