In [None]:
%config InlineBackend.figure_formats = ['svg']
%matplotlib inline
%load_ext autoreload
%autoreload 2

**Varování**: Tato nástěnka zobrazuje odpovědi z textových a kvízových modulů dohromady. V textových odpovědích se neukazují odpovědi, které zadal pouze jeden uživatel.

## Jak tabulky interpretovat

Každá potenciální odpověď má jeden řádek.

### Correct/All evals ratio

- by ideálně mělo být 100 %
- indikuje to, v kolika procentech odevzání byla daná možnost správně (ne)zaškrtnutá
    - i.e. 100 % by znamenalo, že pokud je to
        - správná odpověď, tak byla ve všech odevzdáních zaškrtnuta
        - špatná odpověď, tak nebyla v žádném odevzdání zaškrtnuta
- potenciální problém: jeden řešitel, který neví a tipuje, má větší efekt na procento než řešitel, který to dá na první pokus správně

**Důsledky pro seminář**:

- odpovědi, které mají nízké procento (třeba méně než < 80 %), jsou "chytáky", nebo potenciálně špatně vysvětlené


### Count evals

- absolutní počet odevzdání, které měli danou odpověď zaškrtnutou
- pro správné odpovědi je podkreslení zelené
- pro špatné odpovědi je podkreslení červené
- potenciální problém stejný jako pro předchozí sloupec: jeden řešitel, který neví a tipuje, má větší efekt na procento než řešitel, který to dá na první pokus správně


### Ticked at least once/All users ratio

- Procento řešitelů, kteří odpověď zaškrtli alespoň jednou, ze všech řešitelů, kteří modul řešili.
- Pro špatné odpovědi bychom ideálně chtěli 0 %, pro správně odpovědi ideálně 100 %.
- 100 % na správné odpovědi je špatně dosažitelných, stačí jeden řešitel jež úlohu odevzdal neúspěšně a pak ji nikdy úspěšně nedokončil.

**Důsledky pro seminář**:

Špatné odpovědi by měly mít malé procento, pokud nemají, tak jsou to chytáky, nebo špatně vysvětlené.

Správné odpovědi by měli mít velké procento, pokud nemají, tak jsme nejspíš nedostatečně vysvětlili, proč je tvrzení pravdivé.

Opakovaně neúspěšní/tipující řešitelé nemají takový vliv na výsledná procenta.


In [None]:
import sys
sys.path.append('..')

import matplotlib.pyplot as plt
from collections import OrderedDict, namedtuple
from sqlalchemy import func, distinct, text, and_
import pandas as pd
from IPython.display import display, HTML, Markdown
import seaborn as sns
import numpy as np

import util
from util.year import year as current_year
from db import session
import model
from datetime import datetime
import re
import json
from collections import Counter

pd.options.display.float_format = '{:.2f}'.format
plt.rcParams['figure.figsize'] = [8, 6]
print(datetime.now())

In [None]:
evaluations = session.query(
    model.EvaluationParticipantsWithContext
)\
.filter(model.EvaluationParticipantsWithContext.year_id == current_year.id)\
.filter(model.EvaluationParticipantsWithContext.type.in_([model.ModuleType.QUIZ, model.ModuleType.TEXT]))\
.order_by(model.EvaluationParticipantsWithContext.task_id, model.EvaluationParticipantsWithContext.module_id)

In [None]:
out = evaluations.all()

In [ ]:
TextAnswer = namedtuple("TextAnswer", ["user_answers", "is_correct"])
QuizAnswer = namedtuple("QuizAnswer", ["user_answers", "correct_answers"])
Answer = namedtuple("Answer", ["type", "evaluation", "answer"])

TextStats = namedtuple("TextStats", ["correct_answers", "answer_counts", "answer_users", "total_evaluations", "users"])
QuizStats = namedtuple("QuizStats", ["correct_answer", "combination_counts", "item_counts", "item_users", "total_evaluations", "users"])
Stats = namedtuple("Stats", ["type", "evaluation", "stats"])

RE_TEXT_ANSWER_LINE = re.compile(r'Raw data: \[(.*)\]')
RE_QUIZ_ANSWER_LINE = re.compile(r"^\s*\[[yn]\] Question \d+ -- user answers: \[(.*)\], correct answers: \[(.*)\]")


def quiz_answer_to_tuple(answer):
    return tuple() if not answer else tuple(int(a) for a in answer.split(", "))


def text_answer_to_tuple(answer):
    assert answer
    parts = answer.split(", ")
    
    i = 0
    while i < len(parts):
        if parts[i].count('"') - parts[i].count(r'\"') + parts[i].count(r'\\"') == 1:
            assert i + 1 < len(parts)
            parts[i] += ", " + parts[i+1]
            parts.pop(i+1)
        else:
            i += 1
    
    assert all(p[0] == p[-1] == '"' for p in parts)
    return tuple([p[1:-1].strip() for p in parts])


def extract_text_answer(evaluation):
    lines = evaluation.full_report.split("\n")
    for line in lines:
        match = RE_TEXT_ANSWER_LINE.match(line)
        if match:
            return TextAnswer(text_answer_to_tuple(match.group(1)), evaluation.ok)
    assert False, f"unreachable for evaluation {evaluation.full_report}"
    

def extract_quiz_answer(evaluation):
    lines = evaluation.full_report.split("\n")
    user_answers = []
    correct_answers = []
    for line in lines:
        match = RE_QUIZ_ANSWER_LINE.match(line)
        if match:
            user_answers.append(quiz_answer_to_tuple(match.group(1)))
            correct_answers.append(quiz_answer_to_tuple(match.group(2)))
    return QuizAnswer(tuple(user_answers), tuple(correct_answers))


def extract_answer(evaluation):
    if evaluation.type == model.ModuleType.TEXT:
        answer = extract_text_answer(evaluation)
    elif evaluation.type == model.ModuleType.QUIZ:
        answer = extract_quiz_answer(evaluation)
    else:
        assert False, f"unreachable for type {evaluation.type}"
    return Answer(evaluation.type, evaluation, answer)


def partition_on_module_id(answers):
    result = [[answers[0]]]
    for i in range(1, len(answers)):
        prev_module_id = result[-1][-1].evaluation.module_id
        this_module_id = answers[i].evaluation.module_id
        if prev_module_id == this_module_id:
            result[-1].append(answers[i])
        else:
            result.append([answers[i]])
    return result


def extract_text_stats(answers):
    total_subanswers = len(answers[0].answer.user_answers)
    stats = TextStats(
        correct_answers = [set() for _ in range(total_subanswers)],
        answer_counts = [Counter() for _ in range(total_subanswers)],
        answer_users = [{} for _ in range(total_subanswers)],
        total_evaluations = {"total": len(answers)},
        users = set(a.evaluation.user for a in answers),
    )

    for answer in answers:
        user_answer = answer.answer.user_answers
        for i, subanswer in enumerate(user_answer):
            if answer.answer.is_correct:
                stats.correct_answers[i].add(subanswer)
            stats.answer_counts[i].update([subanswer])
            stats.answer_users[i][subanswer] = stats.answer_users[i].get(subanswer, set()) | {answer.evaluation.user}

    return stats


def extract_quiz_stats(answers):
    stats = QuizStats(
        correct_answer = answers[0].answer.correct_answers,
        combination_counts = Counter(),
        item_counts = Counter(),
        item_users = {},
        total_evaluations = {"total": len(answers)},
        users = set(a.evaluation.user for a in answers),
    )

    for answer in answers:
        user_answers = answer.answer.user_answers
        stats.combination_counts.update([user_answers])
        stats.item_counts.update([(item, answer_item) for item, answer in enumerate(user_answers) for answer_item in answer])
        for item, ans in enumerate(user_answers):
            for answer_item in ans:
                stats.item_users[(item, answer_item)] = stats.item_users.get((item, answer_item), set()) | {answer.evaluation.user}

    return stats


def extract_stats(evaluations):
    if not evaluations:
        return []

    answers = [extract_answer(e) for e in evaluations]
    answers_per_module = partition_on_module_id(answers)

    result = []
    for module_answers in answers_per_module:
        evaluation = module_answers[0].evaluation
        if evaluation.type == model.ModuleType.TEXT:
            stats = extract_text_stats(module_answers)
        elif evaluation.type == model.ModuleType.QUIZ:
            stats = extract_quiz_stats(module_answers)
        else:
            assert False, f"unreachable for type {evaluation.type}"
        result.append(Stats(evaluation.type, evaluation, stats))

    return result


stats = extract_stats(out)

In [None]:
def highlight_by_question(ignore_columns):
    color_switch = True
    def inner(row):
        nonlocal color_switch
        color_switch = (not color_switch) if row["Question"] else color_switch
        base = [f"background-color: {'#F9F9F9' if color_switch else 'white'}"] * len(row)
        for i in ignore_columns:
            base[i] = "background-color: white; border-right: 1px solid #dddddd"
        return base
    return inner


def make_header(ev):
    display(Markdown(f"## {ev.task_id} {ev.task_name}\n### {ev.module_id} {ev.module_name}"))

    
GREEN = "#5fd65f"
RED = "#FF9393"


def is_correct_answer(row):
    return row["Correct"].lower() == "yes"


def color_by_correctness(cols, stat):
    def inner(row):
        styles = ["" for _ in range(len(row))]
        for id_ in cols:
            styles[id_] = "background-color: " + (GREEN if is_correct_answer(row) else RED)
        return styles
    return inner


def custom_bar(col_index, vmin, vmax):
    def inner(row):
        base = ["" for _ in range(len(row))]
        val = row.iloc[col_index]
        ratio = f"{(val - vmin)/vmax * 100:.1f}"
        base[col_index] = f"border-left: 1px solid; width: 10em; background: linear-gradient(90deg,{GREEN if is_correct_answer(row) else RED} {ratio}%, transparent {ratio}%);"
        return base
    return inner


def custom_gradient(col_index, cmap, ondra=False):
    def get_hex_color(val, cmap):
        r, g, b, a = cmap(val)
        return f"rgb({r * 255}, {g * 255}, {b * 255})"

    def inner(row):
        base = ["" for _ in range(len(row))]
        val = row.iloc[col_index]
        if not ondra:
            ratio = val / 100
        else:
            ratio = 0.5 - val / 200 * (-1 if is_correct_answer(row) else 1)
        base[col_index] = f"background-color: {get_hex_color(ratio, cmap)}!important;"
        if not ondra and val > 85:
            base[col_index] += "color: white!important"
        return base
    return inner


def show_quiz_stats(this_module_data, stats):
    data = []
    total_evaluations = stats.total_evaluations["total"]
    total_users = len(stats.users)

    for answer, count in sorted(stats.item_counts.items()):
        item_i, user_a = answer
        ticked_ratio = count / total_evaluations
        ticked_ratio_user = len(stats.item_users[(item_i, user_a)]) / total_users
        try:
            data.append((
                item_i,
                user_a,
                this_module_data[item_i]["question"] if not data or data[-1][0] != item_i else "",
                this_module_data[item_i]["options"][user_a],
                ticked_ratio * 100,
                (ticked_ratio if user_a in stats.correct_answer[item_i] else 1 - ticked_ratio) * 100,
                "Yes" if user_a in stats.correct_answer[item_i] else "No",
                count,
                ticked_ratio_user * 100
            ))
        except IndexError:
            data.append((
                item_i,
                user_a,
                "ERROR: some question in this module probably removed",
                "ERROR: some question in this module probably removed",
                ticked_ratio * 100,
                (ticked_ratio if user_a in stats.correct_answer[item_i] else 1 - ticked_ratio) * 100,
                "Yes" if user_a in stats.correct_answer[item_i] else "No",
                count,
                ticked_ratio_user * 100
            ))

    df = pd.DataFrame(data, columns=[
        "Item ID",
        "Answer ID",
        "Question",
        "Answer",
        "Ticked in/All evals ratio",
        "Correct/All evals ratio",
        "Correct",
        "Count evals",
        "Ticked at least once/All users ratio",
    ])

    s = df.style.hide_columns(["Item ID", "Answer ID", "Correct", "Ticked in/All evals ratio"])
    s.apply(custom_bar(col_index=7, vmin=0, vmax=total_evaluations), axis=1)

    s.format({
        'Ticked in/All evals ratio': '{:,.1f} %'.format,
        'Correct/All evals ratio': '{:,.1f} %'.format,
        'Ticked at least once/All users ratio': '{:,.1f} %'.format,
    })

    s.apply(highlight_by_question(ignore_columns=[7]), axis=1)
    s.apply(custom_gradient(col_index=5, cmap=plt.get_cmap("RdYlGn")), axis=1)
    s.apply(custom_gradient(col_index=8, cmap=sns.diverging_palette(20, 127, s=78, l=77, as_cmap=True), ondra=True), axis=1)
    display(s)

    
def show_text_stats(this_module_data, stats):
    data = []
    total_evaluations = stats.total_evaluations["total"]
    total_users = len(stats.users)
    
    if "diff" in this_module_data:
        display(Markdown(f"Evaluated by diff with `{this_module_data['diff']}`."))
    else:
        display(Markdown(f"Evaluated by the eval script at `{this_module_data['eval_script']}`."))

    for i in range(len(stats.answer_counts)):
        for j, (answer, count) in enumerate(sorted(stats.answer_counts[i].items(), key=lambda p: (p[1], p[0]), reverse=True)):
            submitted_in_evals_ratio = count / total_evaluations
            submitted_by_users = len(stats.answer_users[i][answer])
            submitted_by_users_ratio = submitted_by_users / total_users
            if submitted_by_users > 1:
                data.append((
                    this_module_data["questions"][i].strip() if j == 0 else "",
                    f"<code>{answer}</code>",
                    submitted_in_evals_ratio * 100,
                    (submitted_in_evals_ratio if answer in stats.correct_answers[i] else 1 - submitted_in_evals_ratio) * 100,
                    "Yes" if answer in stats.correct_answers[i] else "No",
                    count,
                    submitted_by_users_ratio * 100
                ))

    df = pd.DataFrame(data, columns=[
        "Question",
        "Answer",
        "Submitted in/All evals ratio",
        "Correct/All evals ratio",
        "Correct",
        "Count evals",
        "Submitted at least once/All users ratio",
    ])

    s = df.style.hide_columns(["Correct/All evals ratio"])
    s.apply(custom_bar(col_index=5, vmin=0, vmax=total_evaluations), axis=1)

    s.format({
        'Submitted in/All evals ratio': '{:,.1f} %'.format,
        'Correct/All evals ratio': '{:,.1f} %'.format,
        'Submitted at least once/All users ratio': '{:,.1f} %'.format,
    })

    s.apply(custom_gradient(col_index=3, cmap=plt.get_cmap("RdYlGn")), axis=1)
    s.apply(custom_gradient(col_index=6, cmap=sns.diverging_palette(20, 127, s=78, l=77, as_cmap=True), ondra=True), axis=1)
    s.set_table_styles([
        {"selector": ".col1 code", "props": [("background-color", "#def2fc")]}
    ])

    display(s)


def show_evaluations_stats(evaluation_stats):
    module_data = {
        id: json.loads(data) for id, data in 
            session.query(model.Module.id, model.Module.data)\
            .filter(model.Module.type.in_([model.ModuleType.QUIZ, model.ModuleType.TEXT])).all()
    }
    
    for stats in evaluation_stats:
        make_header(stats.evaluation)
        if stats.type == model.ModuleType.TEXT:
            show_text_stats(module_data[stats.evaluation.module_id]["text"], stats.stats)
        elif stats.type == model.ModuleType.QUIZ:
            show_quiz_stats(module_data[stats.evaluation.module_id]["quiz"], stats.stats)
            pass
        else:
            assert False, f"unreachable for type {stats.type}"


show_evaluations_stats(stats)