In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.lines import Line2D

sns.set_style("darkgrid")
pd.set_option("display.max_columns", None)

In [None]:
PROGRAM_NAME = "SAMPLE PROGRAM NAME"
table_path = "../artifacts/processed.xlsx"
df = pd.read_excel(table_path)

In [None]:
df.head(2)

# Basic plots

### age distribution

In [None]:
age_groups = {1.0: "18-24", 2.0: "25-34", 3.0: "35-44", 4.0: "45-54", 5.0: "55+"}
age_counts = df["Q15 🔴  Укажите ваш  возраст"].value_counts()
age_counts.index = age_counts.index.to_series().map(age_groups)
colors = sns.color_palette("RdYlGn_r", len(age_counts))


def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        count = int(round(pct * total / 100.0))
        return f"{pct:.1f}%\n({count})"

    return my_autopct


plt.figure(figsize=(6, 6))
plt.pie(
    age_counts,
    labels=age_counts.index,
    startangle=90,
    autopct=make_autopct(age_counts),
    colors=colors,
    pctdistance=0.8,
)
plt.title("Распределение возрастов")
plt.show()

### industries distribution

In [None]:
job_cols = [
    col for col in df.columns if col.startswith("Q13") and not col.startswith("Q13.6")
]
jobs_counts = df[job_cols].sum()
jobs_counts = jobs_counts[jobs_counts > 0].sort_values(ascending=False)


def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        count = int(round(pct * total / 100.0))
        return f"{pct:.1f}%\n({count})"

    return my_autopct


colors = sns.color_palette("RdYlGn_r", len(jobs_counts))
plt.figure(figsize=(6, 6))
plt.pie(
    jobs_counts,
    startangle=90,
    autopct=make_autopct(jobs_counts),
    colors=colors,
    pctdistance=0.8,
)
plt.legend(
    labels=jobs_counts.index,
    loc="center left",
    bbox_to_anchor=(1, 0.5),
    title="Сфера деятельности",
)
plt.title("Распределение сфер деятельности")
plt.show()

In [None]:
industry_codes = {
    1: "Средства массовой информации и развлечения",
    2: "Здравоохранение",
    3: "Образование",
    4: "Некоммерческие организации, неправительственные организации",
    5: "Государственный сектор",
    6: "Консалтинг",
    7: "Недвижимость",
    8: "Финансы",
    9: "Технологии",
    10: "Отели, Рестораны, Кейтеринг",
    11: "Логистика",
    12: "Товары народного потребления",
    13: "Торговля",
    14: "Строительство",
    15: "Энергетика",
    16: "Производство",
    17: "Добыча полезных ископаемых",
    18: "Сельское хозяйство",
    19: "Другое",
}
industry_counts = df[
    "Q14 🔴  Укажите в какой отрасли вы ведете деятельность"
].value_counts()

industry_counts.index = industry_counts.index.to_series().map(industry_codes)
colors = sns.color_palette("RdYlGn_r", len(industry_counts))


def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        count = int(round(pct * total / 100.0))
        return f"{pct:.1f}%\n({count})"

    return my_autopct


plt.figure(figsize=(6, 6))
plt.pie(
    industry_counts,
    startangle=90,
    autopct=make_autopct(industry_counts),
    colors=colors,
    pctdistance=0.8,
)
plt.legend(
    labels=industry_counts.index,
    loc="center left",
    bbox_to_anchor=(1, 0.5),
    title="Сфера деятельности",
)
plt.title("Распределение сфер деятельности")
plt.show()

### CSI and boxplots

In [None]:
discrete_rate_cols = []
for i in [2, 3, 5, 7, 8, 9]:  # indexes of columns with descrete rates
    discrete_rate_cols.append([col for col in df.columns if col.startswith(f"Q{i}")])

short_colnames = [
    "Общая оценка\nпрограммы (Q2)",
    "Насколько достигнуты\nцели обучения (Q3)",
    "Дизайн программы (Q5)",
    "Опыт на\nмеждународных модулях (Q7)",
    "Работа команды\nпрограммы (Q8)",
    "Качество группы (Q9)",
]
box_positions = [0.8 * i for i in range(1, 7)]

discrete_plot_data = []
for cols in discrete_rate_cols:
    mean_series = df[cols].mean(axis=1)
    discrete_plot_data.append(mean_series)

plt.figure(figsize=(16, 6))
boxplot = plt.boxplot(
    discrete_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("Оценка по шкале 1-10")
plt.title("Распределение оценок различных составляющих курса")

for i, col in zip(box_positions, discrete_plot_data):
    mean_val = col.mean()
    plt.plot(i, mean_val, marker="D", color="red", label="Среднее" if i == 0.33 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="Медиана"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="Среднее"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
knowledge_rate_col = (
    "Q2.1 Удовлетворенность приобретенными на программе знаниями, умениями, навыками"
)
prof_rate_col = "Q2.2 Профессорско-преподавательский состав"
admin_rate_col = "Q2.3 Административная поддержка программы"

short_colnames = ["Административная\nподдержка", "Приобретенные\nзнания", "ППС"]
box_positions = [0.33, 0.66, 0.99]

q2_rate_cols = [admin_rate_col, prof_rate_col, knowledge_rate_col]
q2_plot_data = [df[col] for col in q2_rate_cols]

plt.figure(figsize=(9, 6))
boxplot = plt.boxplot(
    q2_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("Оценка по шкале 1-10")
plt.title("Оценка составляющих программы в целом (Q2)")

for i, col in zip(box_positions, q2_rate_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="Среднее" if i == 0.33 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="Медиана"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="Среднее"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
design_cols = [col for col in df.columns if col.startswith("Q5")]
short_colnames = [
    "Логичность\nсодержания",
    "Баланс теории\nи практики",
    "Применимость\nзнаний",
    "Актуальность\nзнаний",
    "Соотношение\nглобальных\nи региональных\nмодулей",
    "Достаточность\nпроектной\nработы",
    "Качество\nвыступающих",
]
box_positions = [0.5 * i for i in range(1, len(design_cols) + 1)]

q5_plot_data = [df[col] for col in design_cols]

plt.figure(figsize=(14, 6))
boxplot = plt.boxplot(
    q5_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("Оценка по шкале 1-10")
plt.title("Распределение оценок курса по дизайну программы (Q5)")

for i, col in zip(box_positions, design_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="Среднее" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="Медиана"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="Среднее"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
international_cols = [col for col in df.columns if col.startswith("Q7")]
short_colnames = [
    "Качество\nкейсов",
    "Применимость\nзнаний",
    "Групповая\nработа",
    "Выбор\nлокаций",
]
box_positions = [0.5 * i for i in range(1, len(international_cols) + 1)]

q7_plot_data = [df[col] for col in international_cols]

plt.figure(figsize=(9, 6))
boxplot = plt.boxplot(
    q7_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("Оценка по шкале 1-10")
plt.title("Распределение оценок курса на международных модулях (Q7)")

for i, col in zip(box_positions, international_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="Среднее" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="Медиана"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="Среднее"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
support_cols = [col for col in df.columns if col.startswith("Q8")]
short_colnames = [
    "Отклик\nна потребности",
    "Организация\nобразовательного\nпроцесса",
]
box_positions = [0.5 * i for i in range(1, len(support_cols) + 1)]

q8_plot_data = [df[col] for col in support_cols]

plt.figure(figsize=(8, 4))
boxplot = plt.boxplot(
    q8_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("Оценка по шкале 1-10")
plt.title("Распределение оценок курса по работе команды (Q8)")

for i, col in zip(box_positions, support_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="Среднее" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="Медиана"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="Среднее"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
group_cols = [col for col in df.columns if col.startswith("Q9")]
short_colnames = [
    "Поддержка\nи взаимопомощь",
    "Опыт и знания\nодногруппников",
    "Разнообразие\nиндустрий",
    "Приобритение\nделовых\nконтактов",
]
box_positions = [0.5 * i for i in range(1, len(group_cols) + 1)]

q9_plot_data = [df[col] for col in group_cols]

plt.figure(figsize=(9, 6))
boxplot = plt.boxplot(
    q9_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("Оценка по шкале 1-10")
plt.title("Распределение оценок курса по качеству группы (Q9)")

for i, col in zip(box_positions, group_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="Среднее" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="Медиана"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="Среднее"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

### NPS

In [None]:
nps_colname = (
    "Q12.1  - 🔴  Готовы ли вы порекомендовать программу своим друзьям/коллегам?"
)

num_students = df.shape[0]
num_promoters = df[df[nps_colname] >= 9].shape[0]
num_critics = df[df[nps_colname] <= 6].shape[0]

nps_value = int((num_promoters - num_critics) / num_students * 100)

In [None]:
labels = [
    "Уровень 'Отлично'\n by Quesionstar",
    PROGRAM_NAME,
    "Сфера образования",
    "Сфера высшего\nобразования",
]
values = [30, nps_value, 42, 51]
colors = ["grey", "blue", "gray", "grey"]


fig, ax = plt.subplots(figsize=(8, 5))

ax.axhspan(0, 20, color="#b2df8a", alpha=0.4, label="Good (by Brain&Company)")
ax.axhspan(20, 50, color="#66bb6a", alpha=0.4, label="Favorable (by Brain&Company)")
ax.axhspan(50, 80, color="#388e3c", alpha=0.4, label="Excellent (by Brain&Company)")
ax.axhspan(80, 100, color="#1b5e20", alpha=0.4, label="World class (by Brain&Company)")

for i, (val, color, label) in enumerate(zip(values, colors, labels)):
    ax.plot([i, i], [-100, val], color="black", linewidth=1.2)
    ax.plot(i, val, "o", color=color, markersize=10)

    ax.text(
        i,
        val + 2,
        f"{val}%",
        ha="center",
        va="bottom",
        fontsize=10,
        fontweight="bold",
        color=color,
    )

ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels)
ax.set_ylim(-100, 100)
ax.set_ylabel("NPS (%)")
ax.set_title("NPS EMBA-35 vs NPS индустрии")
ax.legend(loc="lower right")

plt.tight_layout()
plt.show()

In [None]:
labels = [
    "EMBA-31+32",
    "EMBA-33",
    "EMBA-34",
    PROGRAM_NAME,
    "SKOLKOVO DEGREE",
    "SKOLKOVO EMBA average",
]
values = [57, 47, 51, nps_value, 65, 77]
colors = ["grey", "gray", "grey", "blue", "gray", "grey"]


fig, ax = plt.subplots(figsize=(16, 6))

ax.axhspan(0, 20, color="#b2df8a", alpha=0.4, label="Good (by Brain&Company)")
ax.axhspan(20, 50, color="#66bb6a", alpha=0.4, label="Favorable (by Brain&Company)")
ax.axhspan(50, 80, color="#388e3c", alpha=0.4, label="Excellent (by Brain&Company)")
ax.axhspan(80, 100, color="#1b5e20", alpha=0.4, label="World class (by Brain&Company)")

for i, (val, color, label) in enumerate(zip(values, colors, labels)):
    ax.plot([i, i], [-100, val], color="black", linewidth=1.2)
    ax.plot(i, val, "o", color=color, markersize=10)

    ax.text(
        i,
        val + 2,
        f"{val}%",
        ha="center",
        va="bottom",
        fontsize=10,
        fontweight="bold",
        color=color,
    )

ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels)
ax.set_ylim(-100, 100)
ax.set_ylabel("NPS (%)")
ax.set_title("Сравнение NPS")
ax.legend(loc="lower right")

plt.tight_layout()
plt.show()

### PILOs (Q3)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

q3_cols = [col for col in df.columns if col.startswith("Q3")]
short_colnames = [
    "Экспертный уровень\nзнания бизнес-дисциплин",
    "Анализ данных для\nпринятия решений",
    "Определение стратегии\nдля устойчивого развития",
    "Интеграционное\nлидерство",
    "Эффективная\nкоммуникация",
    "Структурирование\nстратегий",
    "Оценка контекста\nи технологий",
    "Внедрение\nERS",
    "Креативность\nноваторство",
    "Предпринимательское\nмышление",
]
PILOs_df = df[q3_cols]
PILOs_df.columns = short_colnames
PILOs_df = PILOs_df.astype(int)

# total number of scores (1-10) per each PILO
rating_counts = {
    pilo: PILOs_df[pilo].value_counts().reindex(range(1, 11), fill_value=0)
    for pilo in short_colnames
}
rating_counts_df = pd.DataFrame(rating_counts)

# calculate total number of low and high scores
low_counts = (PILOs_df < 7).sum()
high_counts = (PILOs_df > 8).sum()


cmap = plt.get_cmap("RdYlGn", 10)
colors = [cmap(i) for i in range(10)]
fig, ax = plt.subplots(figsize=(14, 8))
bottoms = np.zeros(len(short_colnames))

# plot stacked bars per PILO for each rating
for rating in range(1, 11):
    counts = rating_counts_df.loc[rating]
    ax.bar(
        short_colnames,
        counts,
        bottom=bottoms,
        color=colors[rating - 1],
        label=f"Оценка {rating}",
    )
    bottoms += counts

# add
x = np.arange(len(short_colnames))
for i, pilo in enumerate(short_colnames):
    annotation = f"↓{low_counts[pilo]} ↑{high_counts[pilo]}"
    ax.text(
        x[i],
        bottoms.iloc[i] + 0.5,
        annotation,
        ha="center",
        va="bottom",
        fontweight="bold",
        fontsize=10,
        bbox=dict(facecolor="white", edgecolor="gray"),
    )

handles, labels = ax.get_legend_handles_labels()
handles.append(Line2D([], [], color="none", label="↓ Кол-во низких оценок"))
handles.append(Line2D([], [], color="none", label="↑ Кол-во высоких оценок"))
ax.legend(handles=handles, loc="best")

ax.set_ylim(0, bottoms.max() + 2)
ax.set_yticks([i for i in range(1, len(PILOs_df) + 1)])
ax.set_ylabel("Количество оценок")
plt.xticks(rotation=45)
plt.title("Распределение оценок PILOs")
plt.tight_layout()
plt.show()

### Best lectors & events

In [None]:
lectors_cols = [col for col in df.columns if col.startswith("Q6")]
all_lectors = df[lectors_cols].replace(0, "Никто").values.flatten()
lectors_counts = pd.Series(all_lectors).value_counts().sort_values(ascending=False)

unique_counts = lectors_counts.unique()
cmap = plt.get_cmap("Greens_r", len(unique_counts) + 1)
cnt_to_color = {count: cmap(i) for i, count in enumerate(unique_counts)}
colors = [cnt_to_color[cnt] for cnt in lectors_counts]

plt.figure(figsize=(8, 5))
sns.barplot(
    x=lectors_counts.values,
    y=lectors_counts.index,
    palette=colors,
    hue=lectors_counts.index,
    legend=False,
)
plt.xticks([i for i in range(0, max(lectors_counts) + 1)])
plt.xlabel("Количество упоминаний")
plt.ylabel("Фамилия профессора")
plt.title("Самые запоминающиеся профессора")
plt.tight_layout()
plt.show()

In [None]:
short_colnames = [
    "качестве приглашенного спикера",
    "качестве ментора",
    "мероприятиях для выпускников",
    "адмиссии",
    "другое",
    "качестве спикера",
    "в качестве протагониста",
    "отказываюсь"
]
events_cols = [col for col in df.columns if col.startswith("Q11")]
all_events = df[events_cols].replace("No comments", False).astype(bool)
all_events["Отказываюсь"] = (~all_events).all(axis=1)
all_events.columns = short_colnames

events_counts = {}
for col in short_colnames:
    events_counts[col] = all_events[col].sum()
events_counts = pd.Series(events_counts).sort_values(ascending=False)
unique_counts = events_counts.unique()

cmap = plt.get_cmap("Greens_r", len(unique_counts) + 1)
cnt_to_color = {count: cmap(i) for i, count in enumerate(unique_counts)}
colors = [cnt_to_color[cnt] for cnt in events_counts]

plt.figure(figsize=(8, 5))
sns.barplot(
    x=events_counts.values,
    y=events_counts.index,
    palette=colors,
    hue=events_counts.index,
    legend=False,
)
plt.xticks([i for i in range(0, max(events_counts) + 1)])
plt.xlabel("Количество упоминаний")
plt.ylabel("Готов участвовать в...")
plt.title("В каких активностях Вы готовы принимать участие?")
plt.tight_layout()
plt.show()

# Pairplots