In [1]:
from __future__ import print_function

import csv
import os
import re
import shutil
import urllib.request
import sys

from nbgrader.apps import NbGraderAPI
from traitlets.config import Config
import matplotlib.pyplot as plt
import nbgrader
import subprocess
import numpy as np
import pandas as pd
import requests
import seaborn as sns
from bs4 import BeautifulSoup
from canvasapi import Canvas
from IPython.display import Javascript, Markdown, display
from ipywidgets import fixed, interact, interact_manual, interactive, widgets, Button, Layout
from tqdm import tqdm, tqdm_notebook  # Progress bar

In [2]:
def assign(assignment_id):
    !nbgrader assign {assignment_id} --create --force --IncludeHeaderFooter.header=source/header.ipynb

In [3]:
def download_files(assignment_id, course):
    directory = 'downloaded/%s/archive/' % assignment_id
    if not os.path.exists(directory):
        os.makedirs(directory)

    # Get sis id's from students
    student_dict = {
        student.id: student.sis_user_id
        for student in course.get_users()
    }

    # Get the Canvas assignment id
    assignment = {
        assignment.name: assignment
        for assignment in course.get_assignments()
    }[assignment_id]
    submissions = assignment.get_submissions()

    for submission in tqdm_notebook(submissions):
        # Check if submission has attachments
        if 'attachments' not in submission.attributes:
            continue
        # Download file and give correct name
        student_id = student_dict[submission.user_id]
        attachment = submission.attributes["attachments"][0]
        filename = str(student_id) + "_" + assignment_id + ".ipynb"
        urllib.request.urlretrieve(attachment['url'], directory + filename)
        # Clear all notebooks of output to save memory
        !nbstripout {directory + filename}
    # Move the download files to submission folder
    !nbgrader zip_collect {assignment_id} --force --log-level='INFO'

    # Delete folders which aren't necessary
    shutil.rmtree('downloaded/%s/archive/' % assignment_id)


In [4]:
def update_db(b):
    # Check which students are already in nbgrader database
    students_already_in_db = [
        student.id for student in nbgrader_api.gradebook.students
    ]

    for student in tqdm_notebook(course.get_users(enrollment_type=['student'])):
        first_name, last_name = student.name.split(' ', 1)
        # Add students that are not yet in nbgrader database
        if student.sis_user_id not in students_already_in_db:
            nbgrader_api.gradebook.add_student(
                str(student.sis_user_id),
                first_name=first_name,
                last_name=last_name)

In [5]:
def autograde(assignment_id):
    !nbgrader autograde {assignment_id} --create --force --quiet

In [6]:
def plagiatcheck(assignment_id):
    !jupyter nbconvert --to script downloaded/{assignment_id}/extracted/*.ipynb --output-dir=plagiaatcheck/{assignment_id}/pyfiles / --log-level WARN
    !jupyter nbconvert --to script release/{assignment_id}/*.ipynb --output-dir=plagiaatcheck/{assignment_id}/base / --log-level WARN
    shutil.rmtree('downloaded/%s/extracted/' % assignment_id)
    directory = "plagiaatcheck/%s/pyfiles/" % assignment_id
    for file in os.listdir(directory):
        if file.endswith(".txt"):
            targetfilename = file[:-3] + "py"
            if targetfilename in os.listdir(directory):
                os.remove(directory + targetfilename)
            os.rename(directory + file, directory + targetfilename)
    if not sys.platform.startswith('win'):
        !compare50 plagiaatcheck/{assignment_id}/pyfiles/* -d plagiaatcheck/{assignment_id}/base/
        #!compare50 pyfiles/* -d base/
    else:
        print("Oeps, voor compare50 heb je Linux of Mac nodig.")
    display(
        Markdown(
            '<a class="btn btn-primary" style="margin-top: 10px; text-decoration: none;" href="plagiaatcheck/%s/" target="_blank">Open map met plagiaatresultaten</a>'
            % assignment_id))

In [7]:
def create_feedback(student_id, assignment_id):
    """Given a student_id and assignment_id, creates a feedback file without the Hidden Tests"""
    directory = 'feedback/%s/%s/' % (student_id, assignment_id)
    soup = str(
        BeautifulSoup(
            open("%s%s.html" % (directory, assignment_id), encoding='utf-8'),
            "html.parser"))
    css, html = soup.split('</head>', 1)
    html = re.sub(
        r'(<div class="output_subarea output_text output_error">\n<pre>\n)(?:(?!<\/div>)[\w\W])*(<span class="ansi-red-intense-fg ansi-bold">[\w\W]*?<\/pre>)',
        r'\1\2', html)
    html = re.sub(
        r'<span class="c1">### BEGIN HIDDEN TESTS<\/span>[\w\W]*?<span class="c1">### END HIDDEN TESTS<\/span>',
        '', html)
    soup = css + '</head>' + html
    targetdirectory = 'canvasfeedback/%s/%s/' % (student_id, assignment_id)
    if not os.path.exists(targetdirectory):
        os.makedirs(targetdirectory)
    filename = "%s%s.html" % (targetdirectory, assignment_id)
    Html_file = open(filename, "w", encoding="utf8")
    Html_file.write(soup)
    Html_file.close()
    return filename

In [8]:
def calculate_grade(score, assignment, max_score, gradedict):
    """Calculate grade for an assignment"""
    max_score = gradedict[assignment][
        "max_score"] if assignment in gradedict.keys() else max_score
    min_grade = gradedict[assignment][
        "min_grade"] if assignment in gradedict.keys() else 0
    return max(1, min(
        round(min_grade + (10 - min_grade) * score / max_score, 1), 10.0))

In [9]:
def create_canvas_grades(gradedict):
    """Creates a dataframe with the grades for each person and each assignment"""
    q = '''
        SELECT
        
            submitted_assignment.student_id,
            assignment.name AS assignment,
            SUM(grade_cell.max_score) as max_score,
            SUM(grade.auto_score) as auto_score,
            SUM(grade.manual_score) as manual_score,
            SUM(grade.extra_credit) as extra_credit
            
        FROM grade
            INNER JOIN submitted_notebook ON submitted_notebook.id = grade.notebook_id
            INNER JOIN submitted_assignment ON submitted_assignment.id = submitted_notebook.assignment_id
            INNER JOIN grade_cell ON grade_cell.id = grade.cell_id
            INNER JOIN assignment ON submitted_assignment.assignment_id = assignment.id
        GROUP BY submitted_assignment.student_id, assignment.name
    '''

    canvasdf = pd.read_sql_query(q, 'sqlite:///gradebook.db').fillna(0)
    canvasdf['student_id'] = pd.to_numeric(canvasdf['student_id'])
    canvasdf["score"] = canvasdf["auto_score"] + canvasdf[
        "manual_score"] + canvasdf["extra_credit"]
    canvasdf['grade'] = canvasdf[['score', 'assignment', 'max_score']].apply(
        lambda row: calculate_grade(row[0], row[1], row[2], gradedict), axis=1)
    canvasdf = canvasdf.pivot_table(
        values='grade',
        index='student_id',
        columns='assignment',
        aggfunc='first')
    canvasdf.to_csv('canvas.csv')
    return canvasdf

In [10]:
def create_results_per_question():
    q = '''
        SELECT
            submitted_assignment.student_id,
            grade_cell.name AS question_name,
            grade_cell.max_score,
            grade.needs_manual_grade AS needs_grading,
            grade.auto_score,
            grade.manual_score,
            grade.extra_credit,
            assignment.name AS assignment
        FROM grade
            INNER JOIN submitted_notebook ON submitted_notebook.id = grade.notebook_id
            INNER JOIN submitted_assignment ON submitted_assignment.id = submitted_notebook.assignment_id
            INNER JOIN grade_cell ON grade_cell.id = grade.cell_id
            INNER JOIN assignment ON submitted_assignment.assignment_id = assignment.id
    '''

    df = pd.read_sql_query(q, 'sqlite:///gradebook.db')
    df['final_score'] = np.where(
        ~pd.isnull(df['manual_score']), df['manual_score'],
        df['auto_score']) + df['extra_credit'].fillna(0)

    return df

In [11]:
def visualize_grades(assignment_id, canvas_grades):
    """Creates a plot of the grades from a specific assignment"""
    grades = canvas_grades[assignment_id]
    # ignore grades equal to 1.0
    grades = grades.where(grades >= 1.0).dropna()
    print("The mean grade is {:.1f}".format(grades.mean()))
    print("The median grade is {}".format(grades.median()))
    print("Maximum van Cohen-Schotanus is {}".format(grades.nlargest(max(5,int(len(grades)*0.05))).mean()))
    print("Het percentage onvoldoendes is {:.1f}%. ".format(100* sum(grades < 5.5)/len(grades)))
    if 100* sum(grades < 5.5)/len(grades) > 30:
        print("Het percentage onvoldoendes is te hoog, voor meer informatie kijk op: {}".format("http://toetsing.uva.nl/toetscyclus/analyseren/tentamenanalyse/tentamenanalyse.html#anker-percentage-geslaagde-studenten"))
    fig = sns.distplot(grades, kde_kws={'clip': (0.0, 10.0)}, bins=np.arange(1, 11, 1))
    fig.set_xlim(1, 10)
    fig.set_ylim(0, 1)


def visualize_questions(assignment_id, df):
    """Creates a barchart of how many points people on average received for a question of a specific assignment"""
    q = df.loc[df['assignment'] == assignment_id]
    q = q.fillna(0)
    q = q.groupby('question_name')['final_score'].mean() / q.groupby(
        'question_name')['max_score'].mean()
    q.plot(kind='barh', figsize=(15, 8), xlim=(0, 1))

In [3]:
def color_ca_plot(c):
    pal = sns.color_palette("RdYlGn_r", 6)
    if c >= 0.9:
        return pal[0]
    elif c >= 0.8:
        return pal[1]
    elif c >= 0.7:
        return pal[2]
    elif c >= 0.6:
        return pal[3]
    elif c >= 0.5:
        return pal[4]
    else:
        return pal[5]


def cronbach_alpha_plot():
    testlist = []
    df = pd.pivot_table(
        create_results_per_question(),
        values='final_score',
        index=['student_id'],
        columns=['assignment', 'question_name'],
        aggfunc=np.sum)
    for assignment_id in sorted(set(df.columns.get_level_values(0))):
        items = df[assignment_id].dropna(how='any')

        # source: https://github.com/anthropedia/tci-stats/blob/master/tcistats/__init__.py
        items_count = items.shape[1]
        variance_sum = float(items.var(axis=0, ddof=1).sum())
        total_var = float(items.sum(axis=1).var(ddof=1))

        testlist.append((assignment_id, (items_count / float(items_count - 1) *
                                         (1 - variance_sum / total_var))))
    
    assignment_list, ca = list(zip(*testlist))
    sns.set_style('ticks')
    fig, ax = plt.subplots()
    fig.set_size_inches(10, 5)
    sns.barplot(x=0,y=1,
        data=pd.DataFrame(testlist), palette=map(color_ca_plot, ca)).set_ylim(
            0, 1.0)

In [13]:
def f(row):
    if row['Rir-waarde'] <= 0:
        return 'r'
    elif row['Rir-waarde'] <= 0.25:
        return 'y'
    else:
        return 'g'


def create_rir(assignment_id):
    q = '''
        SELECT
            submitted_assignment.student_id,
            grade_cell.name AS question_name,
            grade_cell.max_score,
            grade.needs_manual_grade AS needs_grading,
            grade.auto_score,
            grade.manual_score,
            grade.extra_credit,
            assignment.name AS assignment
        FROM grade
            INNER JOIN submitted_notebook ON submitted_notebook.id = grade.notebook_id
            INNER JOIN submitted_assignment ON submitted_assignment.id = submitted_notebook.assignment_id
            INNER JOIN grade_cell ON grade_cell.id = grade.cell_id
            INNER JOIN assignment ON submitted_assignment.assignment_id = assignment.id
    '''
    testdict = {}
    df = pd.read_sql_query(q, 'sqlite:///gradebook.db').fillna(0)
    df = df.loc[df['assignment'] == assignment_id]

    if len(df["student_id"].unique()) <= 50:
        print("Norm of 50 students not reached to be meaningful")

    df["total_score_item"] = df["extra_credit"] + df["auto_score"] + df[
        "manual_score"]
    df['student_score-item'] = df['total_score_item'].groupby(
        df['student_id']).transform('sum') - df['total_score_item']
    for question in sorted(set(df["question_name"].values)):
        temp_df = df.loc[df['question_name'] == question]
        testdict[question] = temp_df[[
            "total_score_item", "student_score-item"
        ]].corr().iloc[1, 0]
    testdf = pd.DataFrame.from_dict(
        testdict, orient='index', columns=["Rir-waarde"])
    testdf['positive'] = testdf.apply(f, axis=1)

    ax = testdf.plot(kind='barh', color=[testdf.positive])
    ax.set_xlim(-1, 1)

In [14]:
def upload_to_canvas(b):
    # Haal de laatste cijfers uit gradebook
    canvasdf = create_canvas_grades(gradedict)
    if 'student_dict' not in globals():
        student_dict = {
            student.id: student.sis_user_id
            for student in course.get_users()
        }

    # loop over alle assignments heen, selecteer alleen de nbgraderopdrachten
    for assignment in tqdm_notebook(course.get_assignments(), desc='Assignments'):

        # converteer de canvas-assignment naam naar de nbgrader naam
        assignment_name_nbgrader = assignment.name
        if assignment_name_nbgrader not in list(canvasdf.columns):
            continue
        # loop over alle submissions voor een assignment, alleen als er attachments zijn
        for submission in tqdm_notebook(
                assignment.get_submissions(), desc='Submissions', leave=False):
            try:
                student_id = student_dict[submission.user_id]
            except:
                continue
                
            if int(student_id) not in list(canvasdf.index.values):
                continue
            grade = canvasdf.at[int(student_id), assignment_name_nbgrader]
            if np.isnan(grade):
                continue
            # alleen de cijfers veranderen als die op canvas lager zijn of niet bestaan
            if submission.attributes['score'] == None:
                pass
            elif submission.attributes['score'] >= grade:
                continue

                # creeer feedbackfiles als die nog niet gemaakt zijn
            if 'feedback' not in os.listdir():
                !nbgrader feedback --quiet --force --assignment={assignment_name_nbgrader}
            feedbackfile = create_feedback(student_id,
                                           assignment_name_nbgrader)
            submission.edit(submission={'posted_grade': str(grade)}, comment={'text_comment':
                    message})
            submission.upload_comment(feedbackfile)
        # feedbackfile verwijderen, om ruimte te besparen.
        if 'canvasfeedback' in os.listdir():
            shutil.rmtree('canvasfeedback/', ignore_errors=True)
        if 'feedback' in os.listdir():
            shutil.rmtree('feedback/', ignore_errors=True)
   

In [15]:
db_button = Button(
    description="Update the students in the database",
    layout=Layout(width='300px'))
db_button.on_click(update_db)

interact_assign = interact_manual.options(
    manual_name="Assign de assignment in de database")


canvas_button = Button(
    description="Upload feedback and grades to Canvas",
    layout=Layout(width='300px'))

canvas_button.on_click(upload_to_canvas)

In [16]:
def create_overview(gradedict):
    df = create_canvas_grades(gradedict)
    df = df.fillna(0)
    testlist = []
    l = [
        x for x in [
            "AssignmentWeek1", "AssignmentWeek2", "AssignmentWeek3",
            "Deeltoets1", "AssignmentWeek5", "AssignmentWeek6",
            "AssignmentWeek7", "Deeltoets2"
        ] if x in df.columns
    ]

    for n, c in enumerate(l):
        kolommen_assignments = set(
            [x for x in l[:n + 1] if x.startswith("AssignmentWeek")])
        kolommen_deeltoets = set(
            [x for x in l[:n + 1] if x.startswith("Deeltoets")])
        if kolommen_deeltoets == set():
            voldoende_deeltoets = pd.Series(
                [True for x in range(len(df.index))], index=df.index)
        else:
            voldoende_deeltoets = df[kolommen_deeltoets].mean(axis=1) >= 5.5
        voldoende_assignments = df[kolommen_assignments].mean(axis=1) >= 5.5
        testlist.append(
            [c] + [(x & y).sum()
                   for x in [~voldoende_deeltoets, voldoende_deeltoets]
                   for y in [~voldoende_assignments, voldoende_assignments]])

    testdf = pd.DataFrame(
        testlist,
        columns=[
            "Assignment Name", "Onvoldoende voor beide onderdelen",
            "Onvoldoende voor deeltoets", "Onvoldoende voor assignments",
            "Voldoende voor beide onderdelen"
        ]).set_index("Assignment Name")

    f = plt.figure()
    plt.title(
        'Showing what percentage of students have suifficient grades to pass after that assignment',
        color='black')
    testdf.plot.bar(
        stacked=True,
        color=[(1, 0, 0), (1, 0.65, 0), (1, 1, 0), (0, 0.5, 0)],
        ylim=(0, testdf.sum(axis=1).max()),
        legend='reverse',
        ax=f.gca(),
        figsize=(10, 5))
    plt.legend(
        loc='upper center',
        bbox_to_anchor=(0.5, -0.15),
        fancybox=True,
        shadow=True,
        ncol=len(testdf.columns))

In [17]:
def create_correlations():
    sns.heatmap(
        canvas_grades.mask(canvas_grades < 1.0).corr(),
        vmin=0,
        vmax=1.0,
        annot=True,
        linewidths=.5,
        cmap="RdYlGn")

In [18]:
def create_assignments_plot():
    df = create_canvas_grades(gradedict)
    sns.set_style('ticks')
    fig, ax = plt.subplots()
    plt.ylim(1, 10)
    fig.set_size_inches(13, 8)
    sns.boxplot(data=df.mask(df < 1.0), ax=ax)
    sns.despine()