In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.lines import Line2D

sns.set_style("darkgrid")
pd.set_option("display.max_columns", None)

In [None]:
PROGRAM_NAME = "SAMPLE PROGRAM NAME"
table_path = "../artifacts/processed.xlsx"
df = pd.read_excel(table_path)

In [None]:
df.head(2)

# Basic plots

### age distribution

In [None]:
age_groups = {1.0: "18-24", 2.0: "25-34", 3.0: "35-44", 4.0: "45-54", 5.0: "55+"}
age_counts = df["Q15 üî¥  –£–∫–∞–∂–∏—Ç–µ –≤–∞—à  –≤–æ–∑—Ä–∞—Å—Ç"].value_counts()
age_counts.index = age_counts.index.to_series().map(age_groups)
colors = sns.color_palette("RdYlGn_r", len(age_counts))


def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        count = int(round(pct * total / 100.0))
        return f"{pct:.1f}%\n({count})"

    return my_autopct


plt.figure(figsize=(6, 6))
plt.pie(
    age_counts,
    labels=age_counts.index,
    startangle=90,
    autopct=make_autopct(age_counts),
    colors=colors,
    pctdistance=0.8,
)
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –≤–æ–∑—Ä–∞—Å—Ç–æ–≤")
plt.show()

### industries distribution

In [None]:
job_cols = [
    col for col in df.columns if col.startswith("Q13") and not col.startswith("Q13.6")
]
jobs_counts = df[job_cols].sum()
jobs_counts = jobs_counts[jobs_counts > 0].sort_values(ascending=False)


def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        count = int(round(pct * total / 100.0))
        return f"{pct:.1f}%\n({count})"

    return my_autopct


colors = sns.color_palette("RdYlGn_r", len(jobs_counts))
plt.figure(figsize=(6, 6))
plt.pie(
    jobs_counts,
    startangle=90,
    autopct=make_autopct(jobs_counts),
    colors=colors,
    pctdistance=0.8,
)
plt.legend(
    labels=jobs_counts.index,
    loc="center left",
    bbox_to_anchor=(1, 0.5),
    title="–°—Ñ–µ—Ä–∞ –¥–µ—è—Ç–µ–ª—å–Ω–æ—Å—Ç–∏",
)
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —Å—Ñ–µ—Ä –¥–µ—è—Ç–µ–ª—å–Ω–æ—Å—Ç–∏")
plt.show()

In [None]:
industry_codes = {
    1: "–°—Ä–µ–¥—Å—Ç–≤–∞ –º–∞—Å—Å–æ–≤–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏ –∏ —Ä–∞–∑–≤–ª–µ—á–µ–Ω–∏—è",
    2: "–ó–¥—Ä–∞–≤–æ–æ—Ö—Ä–∞–Ω–µ–Ω–∏–µ",
    3: "–û–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ",
    4: "–ù–µ–∫–æ–º–º–µ—Ä—á–µ—Å–∫–∏–µ –æ—Ä–≥–∞–Ω–∏–∑–∞—Ü–∏–∏, –Ω–µ–ø—Ä–∞–≤–∏—Ç–µ–ª—å—Å—Ç–≤–µ–Ω–Ω—ã–µ –æ—Ä–≥–∞–Ω–∏–∑–∞—Ü–∏–∏",
    5: "–ì–æ—Å—É–¥–∞—Ä—Å—Ç–≤–µ–Ω–Ω—ã–π —Å–µ–∫—Ç–æ—Ä",
    6: "–ö–æ–Ω—Å–∞–ª—Ç–∏–Ω–≥",
    7: "–ù–µ–¥–≤–∏–∂–∏–º–æ—Å—Ç—å",
    8: "–§–∏–Ω–∞–Ω—Å—ã",
    9: "–¢–µ—Ö–Ω–æ–ª–æ–≥–∏–∏",
    10: "–û—Ç–µ–ª–∏, –†–µ—Å—Ç–æ—Ä–∞–Ω—ã, –ö–µ–π—Ç–µ—Ä–∏–Ω–≥",
    11: "–õ–æ–≥–∏—Å—Ç–∏–∫–∞",
    12: "–¢–æ–≤–∞—Ä—ã –Ω–∞—Ä–æ–¥–Ω–æ–≥–æ –ø–æ—Ç—Ä–µ–±–ª–µ–Ω–∏—è",
    13: "–¢–æ—Ä–≥–æ–≤–ª—è",
    14: "–°—Ç—Ä–æ–∏—Ç–µ–ª—å—Å—Ç–≤–æ",
    15: "–≠–Ω–µ—Ä–≥–µ—Ç–∏–∫–∞",
    16: "–ü—Ä–æ–∏–∑–≤–æ–¥—Å—Ç–≤–æ",
    17: "–î–æ–±—ã—á–∞ –ø–æ–ª–µ–∑–Ω—ã—Ö –∏—Å–∫–æ–ø–∞–µ–º—ã—Ö",
    18: "–°–µ–ª—å—Å–∫–æ–µ —Ö–æ–∑—è–π—Å—Ç–≤–æ",
    19: "–î—Ä—É–≥–æ–µ",
}
industry_counts = df[
    "Q14 üî¥  –£–∫–∞–∂–∏—Ç–µ –≤ –∫–∞–∫–æ–π –æ—Ç—Ä–∞—Å–ª–∏ –≤—ã –≤–µ–¥–µ—Ç–µ –¥–µ—è—Ç–µ–ª—å–Ω–æ—Å—Ç—å"
].value_counts()

industry_counts.index = industry_counts.index.to_series().map(industry_codes)
colors = sns.color_palette("RdYlGn_r", len(industry_counts))


def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        count = int(round(pct * total / 100.0))
        return f"{pct:.1f}%\n({count})"

    return my_autopct


plt.figure(figsize=(6, 6))
plt.pie(
    industry_counts,
    startangle=90,
    autopct=make_autopct(industry_counts),
    colors=colors,
    pctdistance=0.8,
)
plt.legend(
    labels=industry_counts.index,
    loc="center left",
    bbox_to_anchor=(1, 0.5),
    title="–°—Ñ–µ—Ä–∞ –¥–µ—è—Ç–µ–ª—å–Ω–æ—Å—Ç–∏",
)
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —Å—Ñ–µ—Ä –¥–µ—è—Ç–µ–ª—å–Ω–æ—Å—Ç–∏")
plt.show()

### CSI and boxplots

In [None]:
discrete_rate_cols = []
for i in [2, 3, 5, 7, 8, 9]:  # indexes of columns with descrete rates
    discrete_rate_cols.append([col for col in df.columns if col.startswith(f"Q{i}")])

short_colnames = [
    "–û–±—â–∞—è –æ—Ü–µ–Ω–∫–∞\n–ø—Ä–æ–≥—Ä–∞–º–º—ã (Q2)",
    "–ù–∞—Å–∫–æ–ª—å–∫–æ –¥–æ—Å—Ç–∏–≥–Ω—É—Ç—ã\n—Ü–µ–ª–∏ –æ–±—É—á–µ–Ω–∏—è (Q3)",
    "–î–∏–∑–∞–π–Ω –ø—Ä–æ–≥—Ä–∞–º–º—ã (Q5)",
    "–û–ø—ã—Ç –Ω–∞\n–º–µ–∂–¥—É–Ω–∞—Ä–æ–¥–Ω—ã—Ö –º–æ–¥—É–ª—è—Ö (Q7)",
    "–†–∞–±–æ—Ç–∞ –∫–æ–º–∞–Ω–¥—ã\n–ø—Ä–æ–≥—Ä–∞–º–º—ã (Q8)",
    "–ö–∞—á–µ—Å—Ç–≤–æ –≥—Ä—É–ø–ø—ã (Q9)",
]
box_positions = [0.8 * i for i in range(1, 7)]

discrete_plot_data = []
for cols in discrete_rate_cols:
    mean_series = df[cols].mean(axis=1)
    discrete_plot_data.append(mean_series)

plt.figure(figsize=(16, 6))
boxplot = plt.boxplot(
    discrete_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("–û—Ü–µ–Ω–∫–∞ –ø–æ —à–∫–∞–ª–µ 1-10")
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –æ—Ü–µ–Ω–æ–∫ —Ä–∞–∑–ª–∏—á–Ω—ã—Ö —Å–æ—Å—Ç–∞–≤–ª—è—é—â–∏—Ö –∫—É—Ä—Å–∞")

for i, col in zip(box_positions, discrete_plot_data):
    mean_val = col.mean()
    plt.plot(i, mean_val, marker="D", color="red", label="–°—Ä–µ–¥–Ω–µ–µ" if i == 0.33 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="–ú–µ–¥–∏–∞–Ω–∞"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="–°—Ä–µ–¥–Ω–µ–µ"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
knowledge_rate_col = (
    "Q2.1 –£–¥–æ–≤–ª–µ—Ç–≤–æ—Ä–µ–Ω–Ω–æ—Å—Ç—å –ø—Ä–∏–æ–±—Ä–µ—Ç–µ–Ω–Ω—ã–º–∏ –Ω–∞ –ø—Ä–æ–≥—Ä–∞–º–º–µ –∑–Ω–∞–Ω–∏—è–º–∏, —É–º–µ–Ω–∏—è–º–∏, –Ω–∞–≤—ã–∫–∞–º–∏"
)
prof_rate_col = "Q2.2 –ü—Ä–æ—Ñ–µ—Å—Å–æ—Ä—Å–∫–æ-–ø—Ä–µ–ø–æ–¥–∞–≤–∞—Ç–µ–ª—å—Å–∫–∏–π —Å–æ—Å—Ç–∞–≤"
admin_rate_col = "Q2.3 –ê–¥–º–∏–Ω–∏—Å—Ç—Ä–∞—Ç–∏–≤–Ω–∞—è –ø–æ–¥–¥–µ—Ä–∂–∫–∞ –ø—Ä–æ–≥—Ä–∞–º–º—ã"

short_colnames = ["–ê–¥–º–∏–Ω–∏—Å—Ç—Ä–∞—Ç–∏–≤–Ω–∞—è\n–ø–æ–¥–¥–µ—Ä–∂–∫–∞", "–ü—Ä–∏–æ–±—Ä–µ—Ç–µ–Ω–Ω—ã–µ\n–∑–Ω–∞–Ω–∏—è", "–ü–ü–°"]
box_positions = [0.33, 0.66, 0.99]

q2_rate_cols = [admin_rate_col, prof_rate_col, knowledge_rate_col]
q2_plot_data = [df[col] for col in q2_rate_cols]

plt.figure(figsize=(9, 6))
boxplot = plt.boxplot(
    q2_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("–û—Ü–µ–Ω–∫–∞ –ø–æ —à–∫–∞–ª–µ 1-10")
plt.title("–û—Ü–µ–Ω–∫–∞ —Å–æ—Å—Ç–∞–≤–ª—è—é—â–∏—Ö –ø—Ä–æ–≥—Ä–∞–º–º—ã –≤ —Ü–µ–ª–æ–º (Q2)")

for i, col in zip(box_positions, q2_rate_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="–°—Ä–µ–¥–Ω–µ–µ" if i == 0.33 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="–ú–µ–¥–∏–∞–Ω–∞"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="–°—Ä–µ–¥–Ω–µ–µ"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
design_cols = [col for col in df.columns if col.startswith("Q5")]
short_colnames = [
    "–õ–æ–≥–∏—á–Ω–æ—Å—Ç—å\n—Å–æ–¥–µ—Ä–∂–∞–Ω–∏—è",
    "–ë–∞–ª–∞–Ω—Å —Ç–µ–æ—Ä–∏–∏\n–∏ –ø—Ä–∞–∫—Ç–∏–∫–∏",
    "–ü—Ä–∏–º–µ–Ω–∏–º–æ—Å—Ç—å\n–∑–Ω–∞–Ω–∏–π",
    "–ê–∫—Ç—É–∞–ª—å–Ω–æ—Å—Ç—å\n–∑–Ω–∞–Ω–∏–π",
    "–°–æ–æ—Ç–Ω–æ—à–µ–Ω–∏–µ\n–≥–ª–æ–±–∞–ª—å–Ω—ã—Ö\n–∏ —Ä–µ–≥–∏–æ–Ω–∞–ª—å–Ω—ã—Ö\n–º–æ–¥—É–ª–µ–π",
    "–î–æ—Å—Ç–∞—Ç–æ—á–Ω–æ—Å—Ç—å\n–ø—Ä–æ–µ–∫—Ç–Ω–æ–π\n—Ä–∞–±–æ—Ç—ã",
    "–ö–∞—á–µ—Å—Ç–≤–æ\n–≤—ã—Å—Ç—É–ø–∞—é—â–∏—Ö",
]
box_positions = [0.5 * i for i in range(1, len(design_cols) + 1)]

q5_plot_data = [df[col] for col in design_cols]

plt.figure(figsize=(14, 6))
boxplot = plt.boxplot(
    q5_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("–û—Ü–µ–Ω–∫–∞ –ø–æ —à–∫–∞–ª–µ 1-10")
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –æ—Ü–µ–Ω–æ–∫ –∫—É—Ä—Å–∞ –ø–æ –¥–∏–∑–∞–π–Ω—É –ø—Ä–æ–≥—Ä–∞–º–º—ã (Q5)")

for i, col in zip(box_positions, design_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="–°—Ä–µ–¥–Ω–µ–µ" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="–ú–µ–¥–∏–∞–Ω–∞"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="–°—Ä–µ–¥–Ω–µ–µ"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
international_cols = [col for col in df.columns if col.startswith("Q7")]
short_colnames = [
    "–ö–∞—á–µ—Å—Ç–≤–æ\n–∫–µ–π—Å–æ–≤",
    "–ü—Ä–∏–º–µ–Ω–∏–º–æ—Å—Ç—å\n–∑–Ω–∞–Ω–∏–π",
    "–ì—Ä—É–ø–ø–æ–≤–∞—è\n—Ä–∞–±–æ—Ç–∞",
    "–í—ã–±–æ—Ä\n–ª–æ–∫–∞—Ü–∏–π",
]
box_positions = [0.5 * i for i in range(1, len(international_cols) + 1)]

q7_plot_data = [df[col] for col in international_cols]

plt.figure(figsize=(9, 6))
boxplot = plt.boxplot(
    q7_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("–û—Ü–µ–Ω–∫–∞ –ø–æ —à–∫–∞–ª–µ 1-10")
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –æ—Ü–µ–Ω–æ–∫ –∫—É—Ä—Å–∞ –Ω–∞ –º–µ–∂–¥—É–Ω–∞—Ä–æ–¥–Ω—ã—Ö –º–æ–¥—É–ª—è—Ö (Q7)")

for i, col in zip(box_positions, international_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="–°—Ä–µ–¥–Ω–µ–µ" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="–ú–µ–¥–∏–∞–Ω–∞"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="–°—Ä–µ–¥–Ω–µ–µ"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
support_cols = [col for col in df.columns if col.startswith("Q8")]
short_colnames = [
    "–û—Ç–∫–ª–∏–∫\n–Ω–∞ –ø–æ—Ç—Ä–µ–±–Ω–æ—Å—Ç–∏",
    "–û—Ä–≥–∞–Ω–∏–∑–∞—Ü–∏—è\n–æ–±—Ä–∞–∑–æ–≤–∞—Ç–µ–ª—å–Ω–æ–≥–æ\n–ø—Ä–æ—Ü–µ—Å—Å–∞",
]
box_positions = [0.5 * i for i in range(1, len(support_cols) + 1)]

q8_plot_data = [df[col] for col in support_cols]

plt.figure(figsize=(8, 4))
boxplot = plt.boxplot(
    q8_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("–û—Ü–µ–Ω–∫–∞ –ø–æ —à–∫–∞–ª–µ 1-10")
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –æ—Ü–µ–Ω–æ–∫ –∫—É—Ä—Å–∞ –ø–æ —Ä–∞–±–æ—Ç–µ –∫–æ–º–∞–Ω–¥—ã (Q8)")

for i, col in zip(box_positions, support_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="–°—Ä–µ–¥–Ω–µ–µ" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="–ú–µ–¥–∏–∞–Ω–∞"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="–°—Ä–µ–¥–Ω–µ–µ"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

In [None]:
group_cols = [col for col in df.columns if col.startswith("Q9")]
short_colnames = [
    "–ü–æ–¥–¥–µ—Ä–∂–∫–∞\n–∏ –≤–∑–∞–∏–º–æ–ø–æ–º–æ—â—å",
    "–û–ø—ã—Ç –∏ –∑–Ω–∞–Ω–∏—è\n–æ–¥–Ω–æ–≥—Ä—É–ø–ø–Ω–∏–∫–æ–≤",
    "–†–∞–∑–Ω–æ–æ–±—Ä–∞–∑–∏–µ\n–∏–Ω–¥—É—Å—Ç—Ä–∏–π",
    "–ü—Ä–∏–æ–±—Ä–∏—Ç–µ–Ω–∏–µ\n–¥–µ–ª–æ–≤—ã—Ö\n–∫–æ–Ω—Ç–∞–∫—Ç–æ–≤",
]
box_positions = [0.5 * i for i in range(1, len(group_cols) + 1)]

q9_plot_data = [df[col] for col in group_cols]

plt.figure(figsize=(9, 6))
boxplot = plt.boxplot(
    q9_plot_data,
    patch_artist=True,
    showfliers=False,
    positions=box_positions,
    whis=np.inf,
)
for patch in boxplot["boxes"]:
    patch.set_facecolor("lightgreen")
for median in boxplot["medians"]:
    median.set_linewidth(2)
    median.set_color("red")
plt.xticks(ticks=box_positions, labels=short_colnames)
plt.ylabel("–û—Ü–µ–Ω–∫–∞ –ø–æ —à–∫–∞–ª–µ 1-10")
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –æ—Ü–µ–Ω–æ–∫ –∫—É—Ä—Å–∞ –ø–æ –∫–∞—á–µ—Å—Ç–≤—É –≥—Ä—É–ø–ø—ã (Q9)")

for i, col in zip(box_positions, group_cols):
    mean_val = df[col].mean()
    plt.plot(i, mean_val, marker="D", color="red", label="–°—Ä–µ–¥–Ω–µ–µ" if i == 0.5 else "")

legend_handles = [
    Line2D([0], [0], color="red", lw=2, label="–ú–µ–¥–∏–∞–Ω–∞"),
    Line2D(
        [0], [0], marker="D", color="lightgreen", markerfacecolor="red", label="–°—Ä–µ–¥–Ω–µ–µ"
    ),
]
plt.legend(handles=legend_handles, loc="upper right")
plt.show()

### NPS

In [None]:
nps_colname = (
    "Q12.1  - üî¥  –ì–æ—Ç–æ–≤—ã –ª–∏ –≤—ã –ø–æ—Ä–µ–∫–æ–º–µ–Ω–¥–æ–≤–∞—Ç—å –ø—Ä–æ–≥—Ä–∞–º–º—É —Å–≤–æ–∏–º –¥—Ä—É–∑—å—è–º/–∫–æ–ª–ª–µ–≥–∞–º?"
)

num_students = df.shape[0]
num_promoters = df[df[nps_colname] >= 9].shape[0]
num_critics = df[df[nps_colname] <= 6].shape[0]

nps_value = int((num_promoters - num_critics) / num_students * 100)

In [None]:
labels = [
    "–£—Ä–æ–≤–µ–Ω—å '–û—Ç–ª–∏—á–Ω–æ'\n by Quesionstar",
    PROGRAM_NAME,
    "–°—Ñ–µ—Ä–∞ –æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è",
    "–°—Ñ–µ—Ä–∞ –≤—ã—Å—à–µ–≥–æ\n–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏—è",
]
values = [30, nps_value, 42, 51]
colors = ["grey", "blue", "gray", "grey"]


fig, ax = plt.subplots(figsize=(8, 5))

ax.axhspan(0, 20, color="#b2df8a", alpha=0.4, label="Good (by Brain&Company)")
ax.axhspan(20, 50, color="#66bb6a", alpha=0.4, label="Favorable (by Brain&Company)")
ax.axhspan(50, 80, color="#388e3c", alpha=0.4, label="Excellent (by Brain&Company)")
ax.axhspan(80, 100, color="#1b5e20", alpha=0.4, label="World class (by Brain&Company)")

for i, (val, color, label) in enumerate(zip(values, colors, labels)):
    ax.plot([i, i], [-100, val], color="black", linewidth=1.2)
    ax.plot(i, val, "o", color=color, markersize=10)

    ax.text(
        i,
        val + 2,
        f"{val}%",
        ha="center",
        va="bottom",
        fontsize=10,
        fontweight="bold",
        color=color,
    )

ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels)
ax.set_ylim(-100, 100)
ax.set_ylabel("NPS (%)")
ax.set_title("NPS EMBA-35 vs NPS –∏–Ω–¥—É—Å—Ç—Ä–∏–∏")
ax.legend(loc="lower right")

plt.tight_layout()
plt.show()

In [None]:
labels = [
    "EMBA-31+32",
    "EMBA-33",
    "EMBA-34",
    PROGRAM_NAME,
    "SKOLKOVO DEGREE",
    "SKOLKOVO EMBA average",
]
values = [57, 47, 51, nps_value, 65, 77]
colors = ["grey", "gray", "grey", "blue", "gray", "grey"]


fig, ax = plt.subplots(figsize=(16, 6))

ax.axhspan(0, 20, color="#b2df8a", alpha=0.4, label="Good (by Brain&Company)")
ax.axhspan(20, 50, color="#66bb6a", alpha=0.4, label="Favorable (by Brain&Company)")
ax.axhspan(50, 80, color="#388e3c", alpha=0.4, label="Excellent (by Brain&Company)")
ax.axhspan(80, 100, color="#1b5e20", alpha=0.4, label="World class (by Brain&Company)")

for i, (val, color, label) in enumerate(zip(values, colors, labels)):
    ax.plot([i, i], [-100, val], color="black", linewidth=1.2)
    ax.plot(i, val, "o", color=color, markersize=10)

    ax.text(
        i,
        val + 2,
        f"{val}%",
        ha="center",
        va="bottom",
        fontsize=10,
        fontweight="bold",
        color=color,
    )

ax.set_xticks(range(len(labels)))
ax.set_xticklabels(labels)
ax.set_ylim(-100, 100)
ax.set_ylabel("NPS (%)")
ax.set_title("–°—Ä–∞–≤–Ω–µ–Ω–∏–µ NPS")
ax.legend(loc="lower right")

plt.tight_layout()
plt.show()

### PILOs (Q3)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

q3_cols = [col for col in df.columns if col.startswith("Q3")]
short_colnames = [
    "–≠–∫—Å–ø–µ—Ä—Ç–Ω—ã–π —É—Ä–æ–≤–µ–Ω—å\n–∑–Ω–∞–Ω–∏—è –±–∏–∑–Ω–µ—Å-–¥–∏—Å—Ü–∏–ø–ª–∏–Ω",
    "–ê–Ω–∞–ª–∏–∑ –¥–∞–Ω–Ω—ã—Ö –¥–ª—è\n–ø—Ä–∏–Ω—è—Ç–∏—è —Ä–µ—à–µ–Ω–∏–π",
    "–û–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —Å—Ç—Ä–∞—Ç–µ–≥–∏–∏\n–¥–ª—è —É—Å—Ç–æ–π—á–∏–≤–æ–≥–æ —Ä–∞–∑–≤–∏—Ç–∏—è",
    "–ò–Ω—Ç–µ–≥—Ä–∞—Ü–∏–æ–Ω–Ω–æ–µ\n–ª–∏–¥–µ—Ä—Å—Ç–≤–æ",
    "–≠—Ñ—Ñ–µ–∫—Ç–∏–≤–Ω–∞—è\n–∫–æ–º–º—É–Ω–∏–∫–∞—Ü–∏—è",
    "–°—Ç—Ä—É–∫—Ç—É—Ä–∏—Ä–æ–≤–∞–Ω–∏–µ\n—Å—Ç—Ä–∞—Ç–µ–≥–∏–π",
    "–û—Ü–µ–Ω–∫–∞ –∫–æ–Ω—Ç–µ–∫—Å—Ç–∞\n–∏ —Ç–µ—Ö–Ω–æ–ª–æ–≥–∏–π",
    "–í–Ω–µ–¥—Ä–µ–Ω–∏–µ\nERS",
    "–ö—Ä–µ–∞—Ç–∏–≤–Ω–æ—Å—Ç—å\n–Ω–æ–≤–∞—Ç–æ—Ä—Å—Ç–≤–æ",
    "–ü—Ä–µ–¥–ø—Ä–∏–Ω–∏–º–∞—Ç–µ–ª—å—Å–∫–æ–µ\n–º—ã—à–ª–µ–Ω–∏–µ",
]
PILOs_df = df[q3_cols]
PILOs_df.columns = short_colnames
PILOs_df = PILOs_df.astype(int)

# total number of scores (1-10) per each PILO
rating_counts = {
    pilo: PILOs_df[pilo].value_counts().reindex(range(1, 11), fill_value=0)
    for pilo in short_colnames
}
rating_counts_df = pd.DataFrame(rating_counts)

# calculate total number of low and high scores
low_counts = (PILOs_df < 7).sum()
high_counts = (PILOs_df > 8).sum()


cmap = plt.get_cmap("RdYlGn", 10)
colors = [cmap(i) for i in range(10)]
fig, ax = plt.subplots(figsize=(14, 8))
bottoms = np.zeros(len(short_colnames))

# plot stacked bars per PILO for each rating
for rating in range(1, 11):
    counts = rating_counts_df.loc[rating]
    ax.bar(
        short_colnames,
        counts,
        bottom=bottoms,
        color=colors[rating - 1],
        label=f"–û—Ü–µ–Ω–∫–∞ {rating}",
    )
    bottoms += counts

# add
x = np.arange(len(short_colnames))
for i, pilo in enumerate(short_colnames):
    annotation = f"‚Üì{low_counts[pilo]} ‚Üë{high_counts[pilo]}"
    ax.text(
        x[i],
        bottoms.iloc[i] + 0.5,
        annotation,
        ha="center",
        va="bottom",
        fontweight="bold",
        fontsize=10,
        bbox=dict(facecolor="white", edgecolor="gray"),
    )

handles, labels = ax.get_legend_handles_labels()
handles.append(Line2D([], [], color="none", label="‚Üì –ö–æ–ª-–≤–æ –Ω–∏–∑–∫–∏—Ö –æ—Ü–µ–Ω–æ–∫"))
handles.append(Line2D([], [], color="none", label="‚Üë –ö–æ–ª-–≤–æ –≤—ã—Å–æ–∫–∏—Ö –æ—Ü–µ–Ω–æ–∫"))
ax.legend(handles=handles, loc="best")

ax.set_ylim(0, bottoms.max() + 2)
ax.set_yticks([i for i in range(1, len(PILOs_df) + 1)])
ax.set_ylabel("–ö–æ–ª–∏—á–µ—Å—Ç–≤–æ –æ—Ü–µ–Ω–æ–∫")
plt.xticks(rotation=45)
plt.title("–†–∞—Å–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ –æ—Ü–µ–Ω–æ–∫ PILOs")
plt.tight_layout()
plt.show()

### Best lectors & events

In [None]:
lectors_cols = [col for col in df.columns if col.startswith("Q6")]
all_lectors = df[lectors_cols].replace(0, "–ù–∏–∫—Ç–æ").values.flatten()
lectors_counts = pd.Series(all_lectors).value_counts().sort_values(ascending=False)

unique_counts = lectors_counts.unique()
cmap = plt.get_cmap("Greens_r", len(unique_counts) + 1)
cnt_to_color = {count: cmap(i) for i, count in enumerate(unique_counts)}
colors = [cnt_to_color[cnt] for cnt in lectors_counts]

plt.figure(figsize=(8, 5))
sns.barplot(
    x=lectors_counts.values,
    y=lectors_counts.index,
    palette=colors,
    hue=lectors_counts.index,
    legend=False,
)
plt.xticks([i for i in range(0, max(lectors_counts) + 1)])
plt.xlabel("–ö–æ–ª–∏—á–µ—Å—Ç–≤–æ —É–ø–æ–º–∏–Ω–∞–Ω–∏–π")
plt.ylabel("–§–∞–º–∏–ª–∏—è –ø—Ä–æ—Ñ–µ—Å—Å–æ—Ä–∞")
plt.title("–°–∞–º—ã–µ –∑–∞–ø–æ–º–∏–Ω–∞—é—â–∏–µ—Å—è –ø—Ä–æ—Ñ–µ—Å—Å–æ—Ä–∞")
plt.tight_layout()
plt.show()

In [None]:
short_colnames = [
    "–∫–∞—á–µ—Å—Ç–≤–µ –ø—Ä–∏–≥–ª–∞—à–µ–Ω–Ω–æ–≥–æ —Å–ø–∏–∫–µ—Ä–∞",
    "–∫–∞—á–µ—Å—Ç–≤–µ –º–µ–Ω—Ç–æ—Ä–∞",
    "–º–µ—Ä–æ–ø—Ä–∏—è—Ç–∏—è—Ö –¥–ª—è –≤—ã–ø—É—Å–∫–Ω–∏–∫–æ–≤",
    "–∞–¥–º–∏—Å—Å–∏–∏",
    "–¥—Ä—É–≥–æ–µ",
    "–∫–∞—á–µ—Å—Ç–≤–µ —Å–ø–∏–∫–µ—Ä–∞",
    "–≤ –∫–∞—á–µ—Å—Ç–≤–µ –ø—Ä–æ—Ç–∞–≥–æ–Ω–∏—Å—Ç–∞",
    "–æ—Ç–∫–∞–∑—ã–≤–∞—é—Å—å"
]
events_cols = [col for col in df.columns if col.startswith("Q11")]
all_events = df[events_cols].replace("No comments", False).astype(bool)
all_events["–û—Ç–∫–∞–∑—ã–≤–∞—é—Å—å"] = (~all_events).all(axis=1)
all_events.columns = short_colnames

events_counts = {}
for col in short_colnames:
    events_counts[col] = all_events[col].sum()
events_counts = pd.Series(events_counts).sort_values(ascending=False)
unique_counts = events_counts.unique()

cmap = plt.get_cmap("Greens_r", len(unique_counts) + 1)
cnt_to_color = {count: cmap(i) for i, count in enumerate(unique_counts)}
colors = [cnt_to_color[cnt] for cnt in events_counts]

plt.figure(figsize=(8, 5))
sns.barplot(
    x=events_counts.values,
    y=events_counts.index,
    palette=colors,
    hue=events_counts.index,
    legend=False,
)
plt.xticks([i for i in range(0, max(events_counts) + 1)])
plt.xlabel("–ö–æ–ª–∏—á–µ—Å—Ç–≤–æ —É–ø–æ–º–∏–Ω–∞–Ω–∏–π")
plt.ylabel("–ì–æ—Ç–æ–≤ —É—á–∞—Å—Ç–≤–æ–≤–∞—Ç—å –≤...")
plt.title("–í –∫–∞–∫–∏—Ö –∞–∫—Ç–∏–≤–Ω–æ—Å—Ç—è—Ö –í—ã –≥–æ—Ç–æ–≤—ã –ø—Ä–∏–Ω–∏–º–∞—Ç—å —É—á–∞—Å—Ç–∏–µ?")
plt.tight_layout()
plt.show()

# Pairplots