In [1]:
import csv
import os
import re
import shutil
import urllib.request
import sys

import warnings
warnings.filterwarnings('ignore')
from nbgrader.apps import NbGraderAPI
from traitlets.config import Config
import matplotlib.pyplot as plt
import nbgrader
import subprocess
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from bs4 import BeautifulSoup
from canvasapi import Canvas
from IPython.display import Javascript, Markdown, display
from ipywidgets import fixed, interact, interact_manual, interactive, widgets, Button, Layout
from tqdm import tqdm, tqdm_notebook  # Progress bar

In [2]:
def assign(assignment_id):
    !nbgrader update {'source/'+assignment_id+'/'+assignment_id+".ipynb"}
    !nbgrader assign {assignment_id} --create --force --IncludeHeaderFooter.header=source/header.ipynb --log-level='INFO'

In [3]:
def download_files(assignment_id, course):
    directory = 'downloaded/%s/archive/' % assignment_id
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Get sis id's from students
    student_dict = get_student_ids(course)

    # Get the Canvas assignment id
    assignment = get_assignment_obj(course, assignment_id)
    
    for submission in tqdm_notebook(assignment.get_submissions()):
        # Check if submission has attachments
        if 'attachments' not in submission.attributes:
            continue
        # Download file and give correct name
        student_id = student_dict[submission.user_id]
        attachment = submission.attributes["attachments"][0]
        filename = str(student_id) + "_" + assignment_id + ".ipynb"
        urllib.request.urlretrieve(attachment['url'], directory + filename)
        # Clear all notebooks of output to save memory
        !nbstripout {directory + filename}
    # Move the download files to submission folder
    !nbgrader zip_collect {assignment_id} --force --log-level='INFO'

    # Delete folders which aren't necessary
    shutil.rmtree('downloaded/%s/archive/' % assignment_id)


In [4]:
def update_db(b):
    # Check which students are already in nbgrader database
    students_already_in_db = [
        student.id for student in nbgrader_api.gradebook.students
    ]

    for student in tqdm_notebook(course.get_users(enrollment_type=['student'])):
        first_name, last_name = student.name.split(' ', 1)
        # Add students that are not yet in nbgrader database
        if student.sis_user_id not in students_already_in_db:
            nbgrader_api.gradebook.add_student(
                str(student.sis_user_id),
                first_name=first_name,
                last_name=last_name)

In [5]:
def autograde(assignment_id):
    !nbgrader autograde {assignment_id} --create --force --quiet

In [6]:
def plagiatcheck(assignment_id):
    !jupyter nbconvert --to script downloaded/{assignment_id}/extracted/*.ipynb --output-dir=plagiaatcheck/{assignment_id}/pyfiles / --log-level WARN
    !jupyter nbconvert --to script release/{assignment_id}/*.ipynb --output-dir=plagiaatcheck/{assignment_id}/base / --log-level WARN
    shutil.rmtree('downloaded/%s/extracted/' % assignment_id)
    directory = "plagiaatcheck/%s/pyfiles/" % assignment_id
    for file in os.listdir(directory):
        if file.endswith(".txt"):
            targetfilename = file[:-3] + "py"
            if targetfilename in os.listdir(directory):
                os.remove(directory + targetfilename)
            os.rename(directory + file, directory + targetfilename)
    if not sys.platform.startswith('win'):
        !compare50 plagiaatcheck/{assignment_id}/pyfiles/* -d plagiaatcheck/{assignment_id}/base/
        #!compare50 pyfiles/* -d base/
    else:
        print("Oeps, voor compare50 heb je Linux of Mac nodig.")
    display(
        Markdown(
            '<a class="btn btn-primary" style="margin-top: 10px; text-decoration: none;" href="plagiaatcheck/%s/" target="_blank">Open map met plagiaatresultaten</a>'
            % assignment_id))

In [7]:
def create_feedback(student_id, assignment_id):
    """Given a student_id and assignment_id, creates a feedback file without the Hidden Tests"""
    directory = 'feedback/%s/%s/' % (student_id, assignment_id)
    soup = str(
        BeautifulSoup(
            open("%s%s.html" % (directory, assignment_id), encoding='utf-8'),
            "html.parser"))
    css, html = soup.split('</head>', 1)
    html = re.sub(
        r'(<div class="output_subarea output_text output_error">\n<pre>\n)(?:(?!<\/div>)[\w\W])*(<span class="ansi-red-intense-fg ansi-bold">[\w\W]*?<\/pre>)',
        r'\1\2', html)
    html = re.sub(
        r'<span class="c1">### BEGIN HIDDEN TESTS<\/span>[\w\W]*?<span class="c1">### END HIDDEN TESTS<\/span>',
        '', html)
    soup = css + '</head>' + html
    targetdirectory = 'canvasfeedback/%s/%s/' % (student_id, assignment_id)
    if not os.path.exists(targetdirectory):
        os.makedirs(targetdirectory)
    filename = "%s%s.html" % (targetdirectory, assignment_id)
    Html_file = open(filename, "w", encoding="utf8")
    Html_file.write(soup)
    Html_file.close()
    return filename

In [8]:
def calculate_grade(score, min_grade, max_score):
    """Calculate grade for an assignment"""
    return max(
        1, min(
            round(min_grade + (10 - min_grade) * score / max_score, 1), 10.0))

In [9]:
def create_grades_per_assignment(assignment_name, gradedict):
    canvasdf = pd.DataFrame(
        nbgrader_api.gradebook.submission_dicts(assignment_name)).set_index(
            'student')
    if assignment_name in gradedict.keys():
        max_score = gradedict[assignment_name]["max_score"]
        min_grade = gradedict[assignment_name]["min_grade"]
    else:
        max_score = canvasdf['max_score'].max()
        min_grade = 0

    canvasdf['grade'] = canvasdf['score'].apply(
        lambda row: calculate_grade(row, min_grade, max_score))
    canvasdf = canvasdf.pivot_table(
        values='grade', index='student', columns='name', aggfunc='first')
    return canvasdf


def total_df(gradedict):

    canvasdf = pd.concat([
        create_grades_per_assignment(x, gradedict)
        for x in graded_submissions()
    ],axis=1)
    return canvasdf

In [10]:
def color_grades(row):
    if row['interval'].right <= 5.5:
        return 'r'
    else:
        return 'g'


def visualize_grades(assignment_id, gradedict):
    """Creates a plot of the grades from a specific assignment"""
    grades = create_grades_per_assignment(assignment_id,
                                          gradedict)[assignment_id]
    # ignore grades equal to 1.0
    grades = grades.where(grades >= 1.0).dropna()
    print("The mean grade is {:.1f}".format(grades.mean()))
    print("The median grade is {}".format(grades.median()))
    print("Maximum van Cohen-Schotanus is {:.1f}".format(
        grades.nlargest(max(5, int(len(grades) * 0.05))).mean()))
    print("Het percentage onvoldoendes is {:.1f}%. ".format(
        100 * sum(grades < 5.5) / len(grades)))
    if 100 * sum(grades < 5.5) / len(grades) > 30:
        print(
            "Het percentage onvoldoendes is te hoog, voor meer informatie kijk op: {}"
            .format(
                "http://toetsing.uva.nl/toetscyclus/analyseren/tentamenanalyse/tentamenanalyse.html#anker-percentage-geslaagde-studenten"
            ))
    sns.set(style="darkgrid")
    bins = np.arange(1, 10, 0.5)
    interval = [pd.Interval(x, x + 0.5, closed='left') for x in bins]
    interval[-1] = pd.Interval(left=9.5, right=10.001, closed='left')
    interval = pd.IntervalIndex(interval)
    new_grades = grades.groupby([pd.cut(grades, interval)]).size()
    test_grades = pd.DataFrame(new_grades)
    test_grades.columns = ["Test"]
    test_grades = test_grades.reset_index()
    test_grades.columns = ["interval", "Test"]
    test_grades['color'] = test_grades.apply(color_grades, axis=1)
    fig, ax = plt.subplots()
    ax.set_xlim(1, 10)
    ax.xaxis.set_ticks(range(1, 11))
    ax2 = ax.twinx()
    ax2.yaxis.set_ticks([])
    ax.bar(
        bins, new_grades, width=0.5, align="edge", color=test_grades['color'])
    sns.kdeplot(grades, ax=ax2, clip=(1, 10))


def p_value(df):
    return df.groupby(
        'question_name', sort=False)['final_score'].mean() / df.groupby(
            'question_name', sort=False)['max_score'].mean()

In [11]:
def create_results_per_question():
    q = '''
        SELECT
            submitted_assignment.student_id,
            grade_cell.name AS question_name,
            grade_cell.max_score,
            grade.needs_manual_grade AS needs_grading,
            grade.auto_score,
            grade.manual_score,
            grade.extra_credit,
            assignment.name AS assignment
        FROM grade
            INNER JOIN submitted_notebook ON submitted_notebook.id = grade.notebook_id
            INNER JOIN submitted_assignment ON submitted_assignment.id = submitted_notebook.assignment_id
            INNER JOIN grade_cell ON grade_cell.id = grade.cell_id
            INNER JOIN assignment ON submitted_assignment.assignment_id = assignment.id
    '''

    df = pd.read_sql_query(q, 'sqlite:///gradebook.db')

    df['final_score'] = np.where(
        ~pd.isnull(df['manual_score']), df['manual_score'],
        df['auto_score']) + df['extra_credit'].fillna(0)
    return df.fillna(0)

In [12]:
def question_visualizations(assignment_id):
    df = create_results_per_question()
    df = df.loc[df['assignment'] == assignment_id]
    p_df = p_value(df)
    rir_df = create_rir(df)
    combined_df = pd.concat([p_df, rir_df], axis=1)
    combined_df = combined_df.reindex(list(p_df.index))
    combined_df = combined_df.reset_index()
    combined_df.columns = ["Question", "P value", "Rir value", "positive"]

    sns.set(style="darkgrid")
    fig, axes = plt.subplots(1, 2, figsize=(12, 7), sharey=True)
    plt.suptitle('P value and Rir value per question')
    sns.barplot(
        x="P value", y="Question", data=combined_df, color='b',
        ax=axes[0]).set_xlim(0, 1.0)
    sns.barplot(
        x="Rir value",
        y="Question",
        data=combined_df,
        ax=axes[1],
        palette=combined_df["positive"]).set_xlim(-1.0, 1.0)

In [13]:
def f(row):
    if row['Rir-waarde'] <= 0:
        return 'r'
    elif row['Rir-waarde'] <= 0.25:
        return 'y'
    else:
        return 'g'


def create_rir(df):
    testdict = {}

    if len(df["student_id"].unique()) <= 50:
        print("Norm of 50 students not reached to be meaningful")

    df["total_score_item"] = df["extra_credit"] + df["auto_score"] + df[
        "manual_score"]
    df['student_score-item'] = df['total_score_item'].groupby(
        df['student_id']).transform('sum') - df['total_score_item']
    for question in sorted(set(df["question_name"].values)):
        temp_df = df.loc[df['question_name'] == question]
        testdict[question] = temp_df[[
            "total_score_item", "student_score-item"
        ]].corr().iloc[1, 0]
    testdf = pd.DataFrame.from_dict(
        testdict, orient='index', columns=["Rir-waarde"])
    testdf['positive'] = testdf.apply(f, axis=1)
    return testdf

In [14]:
def upload_to_canvas(assignment_name, message, feedback=False):
    print(feedback,assignment_name)
    if feedback:
        !nbgrader feedback --quiet --force --assignment={assignment_name}
        
    # Haal de laatste cijfers uit gradebook
    canvasdf = total_df(gradedict)
    student_dict = get_student_ids(course)
    
    assignment = get_assignment_obj(course, assignment_name)
    # loop over alle submissions voor een assignment, alleen als er attachments zijn
    for submission in tqdm_notebook(
            assignment.get_submissions(), desc='Submissions', leave=False):
        try:
            student_id = student_dict[submission.user_id]
        except:
            continue

        if int(student_id) not in list(canvasdf.index.values):
            continue
        grade = canvasdf.at[int(student_id), assignment_name]
        if np.isnan(grade):
            continue
        # alleen de cijfers veranderen als die op canvas lager zijn of niet bestaan
        if submission.attributes['score'] == None:
            pass
        elif submission.attributes['score'] == grade or submission.attributes['score'] == 0:
            continue
        print(grade, student_id)
        if feedback:
            feedbackfile = create_feedback(student_id,
                                       assignment_name)
            submission.upload_comment(feedbackfile)
        submission.edit(submission={'posted_grade': str(grade)}, comment={'text_comment':message})
        
    # feedbackfile verwijderen, om ruimte te besparen.
    if 'canvasfeedback' in os.listdir():
        shutil.rmtree('canvasfeedback/', ignore_errors=True)
    if 'feedback' in os.listdir():
        shutil.rmtree('feedback/', ignore_errors=True)
   

In [15]:
db_button = Button(
    description="Update the students in the database",
    layout=Layout(width='300px'))
db_button.on_click(update_db)

interact_assign = interact_manual.options(
    manual_name="Assign de assignment in de database")


canvas_button = interact_manual.options(
    manual_name="Cijfers naar Canvas jwz")

In [16]:
def create_overview(df, sequence):
    df = df.fillna(0)
    testlist = []
    l = [x for x in sequence if x in df.columns]

    for n, c in enumerate(l):

        kolommen_assignments = set(
            [x for x in l[:n + 1] if x.startswith("AssignmentWeek")])
        kolommen_deeltoets = set(
            [x for x in l[:n + 1] if x.startswith("Deeltoets")])
        temp = df[df[c] > 0]
        if kolommen_deeltoets == set():
            voldoende_deeltoets = pd.Series(
                [True for x in range(len(df.index))], index=df.index)
        else:
            voldoende_deeltoets = temp[kolommen_deeltoets].mean(axis=1) >= 5.5
        voldoende_assignments = temp[kolommen_assignments].mean(axis=1) >= 5.5
        testlist.append(
            [c] + [len(df) - len(temp)] +
            [(x & y).sum()
             for x in [~voldoende_deeltoets, voldoende_deeltoets]
             for y in [~voldoende_assignments, voldoende_assignments]])

    testdf = pd.DataFrame(
        testlist,
        columns=[
            "Assignment Name", "Heeft niet meegedaan aan deze opdracht",
            "Onvoldoende voor beide onderdelen", "Onvoldoende voor deeltoets",
            "Onvoldoende voor assignments", "Voldoende voor beide onderdelen"
        ]).set_index("Assignment Name")
    return testdf

In [17]:
def visualize_overview(gradedict, sequence):
    df = total_df(gradedict)
    overviewdf = create_overview(df, sequence)

    fig, axes = plt.subplots(2, 1, figsize=(12, 12), sharex=True)
    sns.set(style="darkgrid")
    plt.suptitle('Overview of the course')
    df = df.reindex([x for x in sequence if x in df.columns], axis=1)
    a = sns.boxplot(data=df.mask(df < 1.0), ax=axes[0])
    a.set_title('Boxplot for each assignment')
    a.set_ylim(1, 10)
    sns.despine()
    flatui = ["#808080", "#FF0000", "#FFA500", "#FFFF00", "#008000"]
    sns.set_palette(flatui)
    b = overviewdf.plot.bar(
        stacked=True,
        color=flatui,
        ylim=(0, overviewdf.sum(axis=1).max()),
        width=1.0,
        legend='reverse',
        ax=axes[1])
    b.set_title(
        'How many students have suifficient grades to pass after that assignment'
    )
    plt.xticks(rotation=45)
    plt.legend(
        loc='right',
        bbox_to_anchor=(1.4, 0.8),
        fancybox=True,
        shadow=True,
        ncol=1)

In [18]:
def visualize_validity():
    canvas_grades = total_df(gradedict)
    cronbach_df = cronbach_alpha_plot()
    fig, axes = plt.subplots(1, 2, figsize=(15, 7))
    sns.set(style="darkgrid")
    a = sns.heatmap(
        canvas_grades.corr(),
        vmin=-1,
        vmax=1.0,
        annot=True,
        linewidths=.5,
        cmap="RdYlGn",
        ax=axes[0])
    a.set_title("Correlations between assignments")
    a.set(ylabel='', xlabel='')

    b = sns.barplot(
        y="Assignment",
        x="Cronbachs Alpha",
        data=cronbach_df,
        palette=map(color_ca_plot, cronbach_df["Cronbachs Alpha"]),
        ax=axes[1])
    b.set_xlim(0, 1.0)
    b.set(ylabel='', yticks=[])
    b.set_title("Cronbachs Alpha for each assignment")

In [19]:
def color_ca_plot(c):
    pal = sns.color_palette("RdYlGn_r", 6)
    if c >= 0.8:
        return pal[0]
    elif c >= 0.6:
        return pal[1]
    else:
        return pal[5]


def cronbach_alpha_plot():
    testlist = []
    df = pd.pivot_table(
        create_results_per_question(),
        values='final_score',
        index=['student_id'],
        columns=['assignment', 'question_name'],
        aggfunc=np.sum)

    for assignment_id in sorted(set(df.columns.get_level_values(0))):
        items = df[assignment_id].dropna(how='all').fillna(0)

        # source: https://github.com/anthropedia/tci-stats/blob/master/tcistats/__init__.py
        items_count = items.shape[1]
        variance_sum = float(items.var(axis=0, ddof=1).sum())
        total_var = float(items.sum(axis=1).var(ddof=1))

        testlist.append((assignment_id, (items_count / float(items_count - 1) *
                                         (1 - variance_sum / total_var))))

    cronbach_df = pd.DataFrame(
        testlist, columns=["Assignment", "Cronbachs Alpha"])
    return cronbach_df

In [20]:
def canvas_and_nbgrader():
    canvas = set(assignment.name for assignment in course.get_assignments())
    nbgrader = set(assignment for assignment in nbgrader_api.get_source_assignments())
    return sorted(canvas & nbgrader)

In [21]:
def graded_submissions():
    return [x['name'] for x in nbgrader_api.get_assignments() if x['num_submissions'] > 0]

In [22]:
def get_student_ids(course):
    return {student.id: student.sis_user_id for student in course.get_users()}


def get_assignment_obj(course, assignment_name):
    return {
        assignment.name: assignment
        for assignment in course.get_assignments()
    }[assignment_name]