In [1]:
import pandas as pd
import numpy as np
import os
import json
from hashlib import sha512
from vega3 import VegaLite
from IPython.display import display

def display_vg(spec):
    bundle = {}
    bundle['application/vnd.vegalite.v2+json'] = spec
    display(bundle, raw=True)
    
def mkdir(directory):
    """
    :param string directory: 
    :return: 
    """
    try:
        os.mkdir(directory)
    except FileExistsError:
        pass
    
def save_file(file_name, data):
    with open(file_name, "w") as f:
        f.write(data)
        

In [2]:
mkdir("../data/generated/")

In [3]:
module = 'MAT-10044'
output = {}
output["code"] = module

grades = pd.read_csv('../data/raw/grades/{0}.csv'.format(module), skiprows=[0])
grades = grades[['#Ass#', 'Mark', '#Cand Key']]
grades.columns = ['ass', 'grade', 'user']
grades['user'] = grades['user'].str.replace(r'#|/[0-9]', '').apply(lambda u: sha512(u.encode('utf-8')).hexdigest())
grades = grades.set_index('user')

assessments = grades['ass'].unique()
assessment_weights = [0.8, 0.2]
module_grades = pd.DataFrame([], index=grades.index.unique())

for k, ass in enumerate(assessments):
    assessment_grades = grades[grades['ass'] == ass]['grade'].to_frame()
    assessment_grades.columns = [ass]
    assessment_grades['{0}_weighted'.format(ass)] = assessment_grades[ass] * assessment_weights[k]
    module_grades = module_grades.merge(assessment_grades, left_index=True, right_index=True, how="outer")

module_grades = module_grades.fillna(0)
module_grades['final_grade'] = module_grades.filter(regex="_weighted").sum(axis=1)

module_grades.head()

Unnamed: 0_level_0,#01,#01_weighted,#02,#02_weighted,final_grade
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0a628068de9c2a70f8685e318f5dbd07badc53704c8d7655949fda7415f195ec98c3a4d43e3957661f2fd0fce73b4814b58cf552cd35d1865cf6bf42f3e69565,96.0,76.8,89,17.8,94.6
0a6d48869596d53488f8838878198bf3caac38548d83ad7e14734297de4253f8199a814b7db1bab6091835250c7bdcc75acc14a1572f112a3967c79b1503013e,64.0,51.2,95,19.0,70.2
0a8e012a21baf4c32928caa65c149c2afd8a970a7a28820b215decefbe66778c28ae0af71c47544033919fb74846139140f9ec20dd5d8350cd3bcef2155391ff,85.0,68.0,83,16.6,84.6
18e4e11be10aaf08723ffb8f2f731ffb96d9ebdc5f38368f4b2f1e46b5f85a919d3f60d2daafcb07ead56fe24bfa63f6d13eff337a1e5f0b550c609e918bd8dc,79.0,63.2,87,17.4,80.6
1a70e13e29d382e20adf383c93d02fed2bf9b1855867b0d22415c983e81e7750f6da13abf1523d908362d4a5c8faff44b303f1c6b1572f0acf2013608633fc02,95.0,76.0,91,18.2,94.2


In [11]:
height = 400
width = 800

chart = VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json",
    "title": "Module attainment histogram",
    "height": height,
    "width": width,
    "layer": [
        {
            "mark": "bar",
            "encoding": {
                "x": {
                    "bin": True,
                    "field": "final_grade",
                    "type": "quantitative",
                    "axis": {
                        "title": "Grade"
                    }
                },
                "y": {
                    "aggregate": "count",
                    "type": "quantitative",
                    "axis": {
                        "title": "Number of students"
                    }
                }
            }
        },
        {
            "mark": "rule",
            "encoding": {
                "x": {
                    "value": width/100 * 40
                },
                "size": {
                    "value": 2
                },
                "color": {
                    "value": "#ccc"
                }
            }
        },
        {
            "mark": "rule",
            "encoding": {
                "x": {
                    "value": width/100 * 60
                },
                "size": {
                    "value": 2
                },
                "color": {
                    "value": "#ccc"
                }
            }
        }
    ]
}, module_grades)

output["hist"] = chart.spec
display_vg(chart.spec)

In [5]:
stats = {}
stats["median"] = round(module_grades['final_grade'].median(), 1)
stats["gte70"] = int(module_grades[module_grades['final_grade'] >= 70]['final_grade'].count())
stats["gte60"] = int(module_grades[module_grades['final_grade'] >= 60]['final_grade'].count() - stats["gte70"])
stats["lte40"] = int(module_grades[module_grades['final_grade'] <= 40]['final_grade'].count())
stats["zeros"] = int(module_grades[module_grades['final_grade'] == 0]['final_grade'].count())

output["stats"] = stats

In [6]:
# random average grades for now
average_grades = pd.DataFrame((np.random.random_sample(module_grades.shape[0]) * 100).round(1), columns=['average_grade'], index=module_grades.index)
compare_grades = module_grades.merge(average_grades, left_index=True, right_index=True)

# compare_grades = average_grades.to_frame().merge(grades['grade'].to_frame(), left_index=True, right_index=True, sort=True)
compare_grades.head()

Unnamed: 0_level_0,#01,#01_weighted,#02,#02_weighted,final_grade,average_grade
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0a628068de9c2a70f8685e318f5dbd07badc53704c8d7655949fda7415f195ec98c3a4d43e3957661f2fd0fce73b4814b58cf552cd35d1865cf6bf42f3e69565,96.0,76.8,89,17.8,94.6,79.8
0a6d48869596d53488f8838878198bf3caac38548d83ad7e14734297de4253f8199a814b7db1bab6091835250c7bdcc75acc14a1572f112a3967c79b1503013e,64.0,51.2,95,19.0,70.2,54.0
0a8e012a21baf4c32928caa65c149c2afd8a970a7a28820b215decefbe66778c28ae0af71c47544033919fb74846139140f9ec20dd5d8350cd3bcef2155391ff,85.0,68.0,83,16.6,84.6,20.2
18e4e11be10aaf08723ffb8f2f731ffb96d9ebdc5f38368f4b2f1e46b5f85a919d3f60d2daafcb07ead56fe24bfa63f6d13eff337a1e5f0b550c609e918bd8dc,79.0,63.2,87,17.4,80.6,28.4
1a70e13e29d382e20adf383c93d02fed2bf9b1855867b0d22415c983e81e7750f6da13abf1523d908362d4a5c8faff44b303f1c6b1572f0acf2013608633fc02,95.0,76.0,91,18.2,94.2,4.1


In [12]:
chart = VegaLite({
    "$schema": "https://vega.github.io/schema/vega-lite/v2.0.json",
    "title": "Module performance comparison",
    "height": height,
    "width": width,
    "layer": [
        {
            "mark": "circle",
            "encoding": {
                "x": {
                    "field": "average_grade",
                    "type": "quantitative",
                    "axis": {
                        "title": "Average Grade"
                    }
                },
                "y": {
                    "field": "final_grade", 
                    "typs": "quantitative",
                    "axis": {
                        "title": "Module grade"
                    }
                }
            }
        },
        {
            "mark": "rule",
            "encoding": {
                "x": {
                    "value": 0
                },
                "y": {
                    "value": height
                },
                "size": {
                    "value": 1
                },
                "color": {
                    "value": "red"
                }
            }
        }
    ]
}, compare_grades)

output["compare"] = chart.spec
display_vg(chart.spec)

In [13]:
save_file("../data/generated/{0}.json".format(module), json.dumps(output))
# output