In [5]:
%%html
<style>.container { width:100% !important;} </style>

In [1]:
import sys
import os
import pandas as pd
import numpy as np
import json
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [2]:
# Edit this: relative path to directory containing exported data
path = 'export/Default Term/POC/'

Get all files in a DataFrame

In [3]:
# Get the list of all json and attachment files in directory tree
listOfJSON = list()
listOfAttachments = list()
for (dirpath, dirnames, filenames) in os.walk(os.getcwd() + '/' + path):
    relpath = dirpath.replace(os.getcwd() + '/','')
    listOfJSON += [[relpath, file] for file in filenames if file.endswith('.json')]
    listOfAttachments += [[relpath, file] for file in filenames if not file.endswith('.json')]

# Get the JSON data
jsons = []
attachments, comments, comment_attachments, rubric_assessment = [], [], [], []
for (dirpath, jsonfile) in listOfJSON:
    if jsonfile.endswith('.json'):
        with open(os.path.join(dirpath, jsonfile)) as f:
            js = json.load(f)
            jsons.append(js)
            #Get submission attachments
            attachment_list = ''
            if 'attachments' in js:
                attachment_list = [att['filename'] for att in js['attachments']]
            attachments.append(attachment_list)
            #Get submission comments and any attachment
            comms = ''
            comm_attachments = ''
            if 'submission_comments' in js:
                comms = [comm for comm in js['submission_comments']]
                for comm_attach in js['submission_comments']:
                    if 'attachments' in comm_attach:
                        comm_attachments = [comm['filename'] for comm in comm_attach['attachments'] ]
            comments.append(comms or '') #or: do not store []
            comment_attachments.append(comm_attachments or '')
            #Get full_rubric_assessments
            r_assess = ''
            if 'full_rubric_assessment' in js:
                r_assess = js['full_rubric_assessment']
            rubric_assessment.append(r_assess)
                            

(dirpaths, jsonfiles) = zip(*listOfJSON)

# Store all data in a DataFrame. Infer more metadata
df = pd.DataFrame({'path':dirpaths, 'file':jsonfiles, 'json': jsons, 
                   'attachments': attachments, 'comments': comments, 
                   'comment_attachments': comment_attachments, 'rubric_assessment': rubric_assessment})
df['id'] = df.json.apply(lambda json: json['id'])
df['type'] = df.file.apply(lambda json: json.split('_')[0])
hierarchy = ['account','course','assignment','submission']
df['parent'] = df.apply(lambda x: x.json[hierarchy[hierarchy.index(x['type'])-1] + '_id'], axis=1)

df

Unnamed: 0,path,file,json,attachments,comments,comment_attachments,rubric_assessment,id,type,parent
0,export/Default Term/POC/,course_1788710.json,"{'id': 1788710, 'name': 'POC', 'account_id': 8...",,,,,1788710,course,81259
1,export/Default Term/POC/assignment_13558555,assignment_13558555.json,"{'id': 13558555, 'description': '<p>Write some...",,,,,13558555,assignment,1788710
2,export/Default Term/POC/assignment_13558555/su...,submission_211758326.json,"{'id': 211758326, 'body': None, 'url': None, '...",,,,,211758326,submission,13558555
3,export/Default Term/POC/assignment_13558555/su...,submission_211758327.json,"{'id': 211758327, 'body': None, 'url': None, '...",[Assignment+2+-+Maxwells.png],"[{'id': 13259382, 'comment': 'Dear Teacher, Th...",[E.png],"{'id': 4141931, 'rubric_id': 781895, 'rubric_a...",211758327,submission,13558555
4,export/Default Term/POC/assignment_13558555/su...,submission_211758328.json,"{'id': 211758328, 'body': None, 'url': None, '...",,,,,211758328,submission,13558555
5,export/Default Term/POC/assignment_13280576,assignment_13280576.json,"{'id': 13280576, 'description': '<p><span styl...",,,,,13280576,assignment,1788710
6,export/Default Term/POC/assignment_13280576/su...,submission_207012274.json,"{'id': 207012274, 'body': None, 'url': None, '...","[oblig3.py, oblig3-4.pdf]","[{'id': 13111680, 'comment': 'There u go :)', ...",[lunaMoth.jpg],"{'id': 4091290, 'rubric_id': 765886, 'rubric_a...",207012274,submission,13280576
7,export/Default Term/POC/assignment_13280576/su...,submission_207010999.json,"{'id': 207010999, 'body': None, 'url': None, '...","[Assignment+1+-+Drawing.jpg, Assignment+1+-+eq...","[{'id': 13111478, 'comment': 'This is my submi...",,"{'id': 4091317, 'rubric_id': 765886, 'rubric_a...",207010999,submission,13280576
8,export/Default Term/POC/assignment_13280576/su...,submission_207012046.json,"{'id': 207012046, 'body': None, 'url': None, '...",,,,,207012046,submission,13280576


In [4]:
# Print IDs of all courses, assignments, submission, comments
print('Courses:')
df.loc[df.type == 'course'].id
print('Assignments:')
df.loc[df.type == 'assignment'].id
print('Submissions:')
df.loc[df.type == 'submission'].id
print('Comments:')
df.loc[df.type == 'comment'].id

Courses:


0    1788710
Name: id, dtype: int64

Assignments:


1    13558555
5    13280576
Name: id, dtype: int64

Submissions:


2    211758326
3    211758327
4    211758328
6    207012274
7    207010999
8    207012046
Name: id, dtype: int64

Comments:


Series([], Name: id, dtype: int64)

Get all rubric definitions in a DataFrame

In [126]:
rubrics = pd.DataFrame()
asgn_ids = [asgn['id'] for asgn in df[df.type == 'assignment'].json]

for i, asgn in df[df.type == 'assignment'].iterrows():
    rubric = df.loc[(df.id == asgn.json['id'])].json.iloc[0]['rubric']
    rubric_data = [(asgn.json['name'], asgn.json['id'], asgn.file, r_rating['id'], r_rating['points'], r_rating['description'], r_rating['long_description']) for r_i, r_item in enumerate(rubric) for r_rating in r_item['ratings']]
    temp = pd.DataFrame(rubric_data, columns=['assignment_name', 'assignment_id', 'file', 'id', 'points', 'description', 'long_description'])
    rubrics = pd.concat([rubrics, temp], axis=0)
rubrics

Unnamed: 0,assignment_name,assignment_id,file,id,points,description,long_description
0,FYS101 - Assignment 2,13558555,assignment_13558555.json,blank,5.0,B was described correctly,The magnetic field B was described correctly
1,FYS101 - Assignment 2,13558555,assignment_13558555.json,blank_2,0.0,B described incorrectly,The magnetic field B was described incorrectly
2,FYS101 - Assignment 2,13558555,assignment_13558555.json,781895_7350,5.0,E was described correctly,The electric field E was described correctly
3,FYS101 - Assignment 2,13558555,assignment_13558555.json,781895_8373,0.0,E described incorrectly,The electric field E was described incorrectly
4,FYS101 - Assignment 2,13558555,assignment_13558555.json,781895_2335,15.0,Perfect understanding of the equation,The equation was described correctly
5,FYS101 - Assignment 2,13558555,assignment_13558555.json,781895_9223,9.0,Some errors in the description of the equation,The student's description has some flaws
6,FYS101 - Assignment 2,13558555,assignment_13558555.json,781895_837,0.0,Equation described incorrectly,The equation was described incorrectly
0,FYS101 - Assignment 1,13280576,assignment_13280576.json,blank,2.0,Fx vector: correct orientation,Assign full score when Fx vector has a correct...
1,FYS101 - Assignment 1,13280576,assignment_13280576.json,blank_2,0.0,Fx vector: wrong orientation,Assign 0 points when Fx vector has wrong orien...
2,FYS101 - Assignment 1,13280576,assignment_13280576.json,_9641,3.0,Fx vector: correct magnitude,Assign full score when Fx vector has a correct...


In [36]:
# Print all rubrics
def print_rubric(df, assignment_id):
    rubric = df.loc[(df.id == assignment_id)].json.iloc[0]['rubric']
    print('\033[1mRubric for assignment %s\033[0m' % assignment_id)
    template = "| {0:6} | {1:32} | {2:8} | {3:40} |"
    print(template.format('points', 'description', 'id', 'long_description'))
    
    for r_i, r_item in enumerate(rubric):
        for r_rating in r_item['ratings']:
            print(template.format(r_rating['points'], r_rating['description'], r_rating['id'], r_rating['long_description']))
    print('\n'.rjust(len(template.format(0,0,0,0)),'-'))


for asgn in df[df.type == 'assignment'].json:
    if 'rubric' in asgn:
        #print(asgn['id'], '\n', asgn['rubric'])
        print_rubric(df, asgn['id'])

[1mRubric for assignment 13558555[0m
| points | description                      | id       | long_description                         |
|    5.0 | B was described correctly        | blank    | The magnetic field B was described correctly |
|    0.0 | B described incorrectly          | blank_2  | The magnetic field B was described incorrectly |
|    5.0 | E was described correctly        | 781895_7350 | The electric field E was described correctly |
|    0.0 | E described incorrectly          | 781895_8373 | The electric field E was described incorrectly |
|   15.0 | Perfect understanding of the equation | 781895_2335 | The equation was described correctly     |
|    9.0 | Some errors in the description of the equation | 781895_9223 | The student's description has some flaws |
|    0.0 | Equation described incorrectly   | 781895_837 | The equation was described incorrectly   |
--------------------------------------------------------------------------------------------------

[1mRubr

Get all rubric submitted in a DataFrame

In [170]:
rubric_assessments = pd.DataFrame()
for i,row in df.loc[df.rubric_assessment != ''].iterrows():
    comments = []
    for c in row.comments:
        comment = dict()
        comment['comment'] = c['comment']
        comment['author_id'] = str(c['author_id'])
        comment['comment_attachments'] = ', '.join(row.loc['comment_attachments'])
        comments.append(comment)
    assignment_name = rubrics.loc[rubrics.assignment_id == row['rubric_assessment']['rubric_association']['association_id']].loc[0,'assignment_name']
    for data in row['rubric_assessment']['data']:
        rubric_assessments_data = [assignment_name, row['rubric_assessment']['rubric_association']['association_id'], row.json['user_id'], row.attachments, comments, 
                               data['points'], data['description'], data['comments']]
        temp = pd.DataFrame([rubric_assessments_data], columns=['assignment_name', 'assignment_id', 'student_id', 'attachments','comments','points','description','rubric_comment'])
        rubric_assessments = pd.concat([rubric_assessments, temp], axis=0)
rubric_assessments

Unnamed: 0,assignment_name,assignment_id,student_id,attachments,comments,points,description,rubric_comment
0,FYS101 - Assignment 2,13558555,23605017,[Assignment+2+-+Maxwells.png],"[{'comment': 'Dear Teacher, This is my submiss...",5.0,B was described correctly,
0,FYS101 - Assignment 2,13558555,23605017,[Assignment+2+-+Maxwells.png],"[{'comment': 'Dear Teacher, This is my submiss...",0.0,E described incorrectly,
0,FYS101 - Assignment 2,13558555,23605017,[Assignment+2+-+Maxwells.png],"[{'comment': 'Dear Teacher, This is my submiss...",15.0,Perfect understanding of the equation,
0,FYS101 - Assignment 1,13280576,25104805,"[oblig3.py, oblig3-4.pdf]","[{'comment': 'There u go :)', 'author_id': '25...",0.0,No Marks,Too bad!
0,FYS101 - Assignment 1,13280576,25104805,"[oblig3.py, oblig3-4.pdf]","[{'comment': 'There u go :)', 'author_id': '25...",50.0,Full Marks,Nice
0,FYS101 - Assignment 1,13280576,23605017,"[Assignment+1+-+Drawing.jpg, Assignment+1+-+eq...","[{'comment': 'This is my submission', 'author_...",2.0,Fx vector: correct orientation,
0,FYS101 - Assignment 1,13280576,23605017,"[Assignment+1+-+Drawing.jpg, Assignment+1+-+eq...","[{'comment': 'This is my submission', 'author_...",0.0,Fx vector: wrong magnitude,
0,FYS101 - Assignment 1,13280576,23605017,"[Assignment+1+-+Drawing.jpg, Assignment+1+-+eq...","[{'comment': 'This is my submission', 'author_...",3.0,Fy vector: partially correct,
0,FYS101 - Assignment 1,13280576,23605017,"[Assignment+1+-+Drawing.jpg, Assignment+1+-+eq...","[{'comment': 'This is my submission', 'author_...",10.0,Equations are correct,


In [173]:
import pickle
with open('example_dataframes.pkl', 'wb') as f: pickle.dump([df, rubrics, rubric_assessments], f)
#with open('example_dataframes.pkl', 'rb') as f: df,r,ra=pickle.load(f)

In [80]:
# Print submission (attachments, comments, full rubric assessments)
for i,row in df.loc[df.rubric_assessment != ''].iterrows():
    ra = row['rubric_assessment']
    print("\033[1mAssignment %d, student %d\033[0m" % (ra['rubric_association']['association_id'], row.json['user_id']))
    
    # Attachments
    print('Attachments: %s' % ', '.join(row.attachments))
    
    # Comments
    print('Comments:',len(row.comments))
    template = "  | {0:30} | {1:10} | {2:20} |"
    print(template.format("comment", "author_id", "attachments"))
    for comment in row.comments:
        print(template.format(comment['comment'], str(comment['author_id']), ', '.join(row.loc['comment_attachments'])))
    print(' ',''.rjust(70,'-'))
    
    # Full rubric assessment
    print("Full rubric assessment:")
    template = "  | {0:6} | {1:32} | {2:50} |"
    print(template.format("Points", "Description", "Comment"))
    for data in ra['data']:        
        for rec in [(data['points'], data['description'], data['comments'])]: 
            print(template.format(*rec))
    print(' ','\n'.rjust(98,'-'))

[1mAssignment 13558555, student 23605017[0m
Attachments: Assignment+2+-+Maxwells.png
Comments: 2
  | comment                        | author_id  | attachments          |
  | Dear Teacher,
This is my submission for FYS101 Assignment 2 | 23605017   | E.png                |
  | Too bad you did not explain what E is: it is the electric field! See more details by going to Submission Details > Show Rubric | 23434396   | E.png                |
  ----------------------------------------------------------------------
Full rubric assessment:
  | Points | Description                      | Comment                                            |
  |    5.0 | B was described correctly        |                                                    |
  |    0.0 | E described incorrectly          |                                                    |
  |   15.0 | Perfect understanding of the equation |                                                    |
  -------------------------------------------------