In [1]:
import utils
import pandas as pd
from pprint import pprint
import json
import numpy as np
import seaborn as sns
from statsmodels.stats.proportion import proportion_confint

sns.set()

In [None]:
answers, answers_flat, quizzes = utils.load_latest_answers()

From github.com:willcrichton/rust-book
   6da87030..95f99320  ownership-chapter -> origin/ownership-chapter


Already up to date.
Loading answers...
Loading quizzes...
Postprocessing data...


In [None]:
len(answers.sessionId.unique())

In [None]:
answers

In [None]:
def hash_answer(a):
    return hash(json.dumps(a, sort_keys=True))
answers_flat['answerHash'] = answers_flat.answer.map(hash_answer)

In [None]:
from matplotlib.dates import DateFormatter

ax = answers.set_index('timestamp').resample('D').size().plot.line()
date_form = DateFormatter("%m-%d")
ax.xaxis.set_major_formatter(date_form)

In [None]:
daily_users = answers.set_index('timestamp').resample('D').apply(lambda df: len(df.sessionId.unique()))
# daily_users.loc['2022-09-01':].rolling(7).mean().plot.line(style='.-')
daily_users.plot.line(style='.-')

In [None]:
QS = [
    "5080616f-6f4c-43f7-aa3a-8b23f2798937",
    "e8da8773-8df2-4279-8c27-b7e9eda1dddd",
    "1e4f082c-ffa4-4d33-8726-2dbcd72e1aa2",
    "7fbda6e4-dca7-458e-ab52-fab03181f90b",
    "f1ab2171-96f0-4380-b16d-9055a9a00415",
    "6028c7f5-b373-475f-ba43-553773c7b3b7",
    "6a94bc90-6cc5-4a47-838c-596d3747b7e4",
    "40ae0cfe-3567-4d05-b0d9-54d612a2d654",
    "d03748df-8dcf-4ec8-bd30-341927544665",
    "694bb2d0-f2e6-4b0b-a3e7-2d9f9e8b3d09",
    "a2a1546b-27bf-47d2-a1bc-86d777573f40"
]

In [None]:
def print_q(qid):
    df = answers_flat[(answers_flat.id == qid) & (answers_flat.explanation)]
    if len(df) == 0: return
    row = df.iloc[0]
    print(row.quizName, row.question+1)
    
    def print_explanations(correct):
        df2 = df[df.correct_v2 == correct]
        hashes = df2.groupby('answerHash').size().sort_values(ascending=False).index.tolist()
        for h in hashes:
            rows = df2[df2.answerHash == h]
            answer = rows.iloc[0].answer
            print(f"ANSWSER (N={len(rows)}):", answer)
            print("EXPLANATIONS:")
            for _, row in rows.iterrows():
                print(row.explanation)
                print()
            
            
#         for _, row in :
#             print(row.answer)
#             print(row.explanation)
#             print()
            
#     print('CORRECT:')
#     print_explanations(True)
    print('INCORRECT:')
    print_explanations(False)
    print("="*30)
    

print_q("6a94bc90-6cc5-4a47-838c-596d3747b7e4")

# Quiz-level performance

In [None]:
def ci(ser):
    mu = ser.mean()
    sigma = ser.sem()
    return f'{mu:.02f} ± {2*sigma:.02f} (N = {len(ser)})'
answers.groupby(['quizName', 'version']).frac_correct.apply(ci)

In [None]:
answers.groupby('quizName').frac_correct.describe()[['count', 'mean', '50%']].sort_values('quizName')

## Intervention analysis

In [None]:
import subprocess as sp

TAGS = ['ownership-incompleteness', 'borrow-promotion', 'intervention-01', "moved-match-questions"]

commit_tags = {
    tag: utils.date_for_commit(sp.check_output(f'git rev-list -n 1 {tag}', shell=True, cwd=utils.QUIZ_DIR).decode('utf-8').strip())
    for tag in TAGS
}

In [None]:
interventions = [
    (('ch04-01-ownership-sec1-move', 3), ('ch04-01-ownership-sec3-functions', 2), 'ownership-incompleteness'),
    (('ch04-02-references-sec2-mut', 0), ('ch04-02-references-sec2-mut', 0), 'borrow-promotion')
]

for (before, after, tag) in interventions:
    def f(q, prior):
        df = answers_flat
        (quiz, question) = q
        df = df[(df.quizName == quiz) & (df.question == question)]
        df = df[df.timestamp.dt.date < commit_tags[tag].date()] if prior else df[df.timestamp.dt.date >= commit_tags[tag].date()]
        low, upp = proportion_confint(df.correct_v2.sum(), len(df))
        print(f'prior={prior}, N={len(df)}, {df.correct_v2.mean()}, correct=[{low:.02}-{upp:.02}]')
        
    print(tag)
    f(before, True)
    f(after, False)


In [None]:
def cohens_d(x1, x2):
    n1, n2 = len(x1), len(x2)
    sd_pooled = np.sqrt((n1-1) * np.std(x1)**2 + (n2-1) * np.std(x2)**2) / (n1+n2-2)
    return (x2.mean() - x1.mean()) / sd_pooled

In [None]:
from scipy.stats import ttest_ind

In [None]:
interventions = [
    ("5080616f-6f4c-43f7-aa3a-8b23f2798937", "intervention-01"),
    ("7fbda6e4-dca7-458e-ab52-fab03181f90b", "intervention-01"),
    ("e8da8773-8df2-4279-8c27-b7e9eda1dddd", "intervention-01"),
    ("f1ab2171-96f0-4380-b16d-9055a9a00415", "intervention-01"),
    ("40ae0cfe-3567-4d05-b0d9-54d612a2d654", "intervention-01"),
    ("694bb2d0-f2e6-4b0b-a3e7-2d9f9e8b3d09", "intervention-01"),
    ("d03748df-8dcf-4ec8-bd30-341927544665", "intervention-01"),
    
    ("1e4f082c-ffa4-4d33-8726-2dbcd72e1aa2", "moved-match-questions"),
]

effects = []
for (qid, tag) in interventions:
    def f(after):
        df = answers_flat
        df = df[df.id == qid]
        df = df[df.timestamp.dt.date >= commit_tags[tag].date()] if after else df[df.timestamp.dt.date < commit_tags[tag].date()]
        low, upp = proportion_confint(df.correct_v2.sum(), len(df))
        print(f'after={after}, N={len(df)}, {df.correct_v2.mean():.02}, correct=[{low:.02}-{upp:.02}]')
        return df.correct_v2
        
    row = answers_flat[answers_flat.id == qid].iloc[0]
    print(row.quizName, row.question+1, tag)
    before = f(False)
    after = f(True)
    
    effect = after.mean() - before.mean()
    p = ttest_ind(before, after).pvalue
    print(f'effect={effect:.03}, p={p:.03}')
    if p < 0.05: effects.append(effect)
        
    print()

avg_effect = np.mean(effects)
print(f'Nsig={len(effects)}/{len(interventions)}, avg_effect={avg_effect}')

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

sns.set()
plt.figure(figsize=(4, 4))
ax = sns.barplot(data=pd.DataFrame([
    {"x": "before", "y": 0.29, "sd": 0.05},
    {"x": "before", "y": 0.29, "sd": 0.05},
    {"x": "after", "y": 0.71, "sd": 0.04},
    {"x": "after", "y": 0.71, "sd": 0.04}
]), x="x", y="y", errorbar=lambda df: [0.24, 0.35] if df.iloc[0] == 0.29 else [0.67, 0.75], width=0.5)
ax.set_ylim([0, 1])
ax.set_xlabel('Before or after intervention?')
ax.set_ylabel('Fraction of responses that are correct')
plt.savefig('intervention.pdf', bbox_inches='tight')

# Question-level performance

In [None]:

for quiz_name, versions in sorted(quizzes.quizzes.items(), key=lambda t: t[0]):
    schema = max(versions['schemas'].values(), key=lambda d: d['version'])['schema']
    quiz_answers = answers[answers.quizName == quiz_name]
    N = len(quiz_answers)
    if N < 2: continue
    
    meta = [
        {
            'n': 0,
            'bad_answers': []
        } 
        for _ in range(len(schema['questions']))
    ]
    for user_answers in quiz_answers.answers:
        for i, answer in enumerate(user_answers):
            m = meta[i]
            if answer['correct']:
                m['n'] += 1
            else:
                m['bad_answers'].append(answer['answer'])
    
    
    print(f'{"="*10} QUIZ: {quiz_name} {"="*10}')
    for i, q in enumerate(schema['questions']):
        m = meta[i]
        lower, upper = proportion_confint(m['n'], N)
        print(f"QUESTION {i+1}")
        print(f'%correct: {m["n"]/N:.02f} ([{lower:.02f} - {upper:.02f}], N = {N})')
        pprint(q['prompt'])
        print(m['bad_answers'])
        print()
    
    print('\n\n')

# User-level performance

In [None]:
answers.groupby('sessionId').frac_correct.describe()[['count', 'mean', '50%']].sort_values('count')