Original author: Aaron Gourdin, Github: agourdin

This code downloads CSV reports from the reports module of Canvas' API. https://canvas.instructure.com/doc/api/account_reports.html

In [1]:
#!/usr/bin/env python
# working as of 6/8/2018
import requests
import time, json, os, shutil
import re,pprint
import smtplib

### NOTES: ###
# 1. You can use an external credentials file to (slightly) increase security.
#   a. This works for both token and email credential information.
# 2. All <> angle brackets are designed to be replaced with your data. DO NOT INCLUDE THEM.

# Change this to match your access token
from config import token

# Change this to match the domain you use to access Canvas
CANVAS_DOMAIN  = "sltr.instructure.com"

# Change this to the full path of your desired output folder - always use / whether linux or Windows
# Always include trailing / and uncomment options to create directories
OUTPUT_FOLDER = "./data/outcomes/{enrollment_term}/"

#if not os.path.exists(OUTPUT_FOLDER):
#   os.makedirs(OUTPUT_FOLDER)

enrollment_terms = [{"id":1, "name":"2018-2019", "academic_year":2019}, {"id":4, "name":"2019-2020", "academic_year":2020}]
file_list = []

for term in enrollment_terms:
    # Change this to the term ID to pull for, otherwise this will pull for all terms
    # Use API call to find term IDs: https://<domain>.instructure.com/api/v1/accounts/<account_id>/terms
    ENROLLMENT_TERM = term["id"]

    # Change account ID - use "self" for the account attached to the token provided
    # MUST BE A STR
    # Change to limit scope of report being pulled (i.e., "1" pulls for root account)
    ACCOUNT_ID = "1"

    # Edit each of these to determine which to include in the report
    include_deleted_items = False
    do_accounts = True
    do_courses = False
    do_enrollments = False
    do_sections = False
    do_terms = False
    do_users = False
    do_xlist = False
    do_group_membership = False
    do_groups = False

    ###################################################################################
    ############# BE EXTREMELY CAREFUL CHANGING ANY INFORMATION BELOW #################

    BASE_DOMAIN = "https://%s/api/v1/%%s/" % CANVAS_DOMAIN
    BASE_URI = BASE_DOMAIN % "accounts/%s/reports" % ACCOUNT_ID
    BASE_START_URI = BASE_DOMAIN % "accounts/%s/reports/%%s" % ACCOUNT_ID
    BASE_FILE_URI =  BASE_DOMAIN % "files/%s"

    # This headers dictionary is used for almost every request
    headers = {"Authorization":"Bearer %s" % token}

    # Use list indices to indicate report to run - default is [0]
    standard_reports = (
      "proserv_student_submissions_csv", #0
      "grade_export_csv", #1
      "mgp_grade_export_csv", #2
      "last_user_access_csv", #3
      "last_enrollment_activity_csv", #4
      "provisioning_csv", #5
      "recently_deleted_courses_csv", #6
      "student_assignment_outcome_map_csv", #7
      "unpublished_courses_csv", #8
      "course_storage_csv", #9
      "unused_courses_csv", #10
      "zero_activity_csv", #11
      "lti_report_csv", #12
      "outcome_results_csv") #13

    # This is the list of parameters used for the sis_export_csv report, I think I'm actually
    # missing one, parameters[enrollment_term], but I'm not sure
    report_parameters = {
      "parameters[accounts]": do_accounts,
      "parameters[courses]": do_courses,
      "parameters[enrollments]": do_enrollments,
      "parameters[groups]": do_groups,
      "parameters[group_membership]": do_group_membership,
      "parameters[include_deleted]": include_deleted_items,
      "parameters[sections]": do_sections,
      "parameters[terms]": do_terms,
      "parameters[users]": do_users,
      "parameters[xlist]": do_xlist}

    # If ENROLLMENT_TERM isn't False, add it to the parameters list
    if ENROLLMENT_TERM != False:
        report_parameters["parameters[enrollment_term]"]=ENROLLMENT_TERM

    # CHANGE INDICES TO RUN REPORTS
    reports_requested = [13]
    i = 0

    # REMOVE OLD FILE(S) FIRST
    folder = OUTPUT_FOLDER.format(enrollment_term=term['name'])
    try:
        for the_file in os.listdir(folder):
            file_path = os.path.join(folder, the_file)
            try:
                if os.path.isfile(file_path):
                    os.unlink(file_path)
                elif os.path.isdir(file_path): shutil.rmtree(file_path)
            except Exception as e:
                print(e)
    except FileNotFoundError:
        os.mkdir(folder)
        
    # NOW REQUEST AND WRITE NEW FILES
    for report_index in reports_requested:
        # Step 1: Start the report
        start_report_url = BASE_START_URI % standard_reports[report_index]
        print(start_report_url)

        print("running the report...")
        start_report_response = requests.post(start_report_url,headers=headers,params=report_parameters)
        print(start_report_response.text)

        # Use the id from that output to check the progress of the report.
        status_url = start_report_url + "%s" % start_report_response.json()["id"]
        status_response = requests.get(status_url,headers=headers)
        status_response_json = status_response.json()

        # Step 2: Wait for the report to be finished
        while status_response_json["progress"] < 100:
            status_response = requests.get(status_url,headers=headers)
            status_response_json = status_response.json()
            time.sleep(4)
            print("report progress",status_response_json["progress"])

        file_url = status_response_json["file_url"]
        file_id_pattern = re.compile("files\/(\d+)\/download")

        # Once "progress" is 100 then parse out the number between "files" and "download"

        # Step 3: Download the file
        file_info_url = status_response_json["attachment"]["url"]
        file_response = requests.get(file_info_url,headers=headers,stream=True)


        # Step 4: Save the file
        filename = OUTPUT_FOLDER.format(enrollment_term=term['name']) + status_response_json["attachment"]["filename"]
        with open(filename,"w+b") as f:
            f.write(file_response.content)
            
        file_list.append({"filename":filename, "academic_year":term['academic_year']})

        i += 1

https://sltr.instructure.com/api/v1/accounts/1/reports/outcome_results_csv/
running the report...
{"id":595,"progress":0,"parameters":{"accounts":"True","courses":"False","enrollments":"False","groups":"False","group_membership":"False","include_deleted":"False","sections":"False","terms":"False","users":"False","xlist":"False","enrollment_term":"1"},"current_line":null,"status":"created","report":"outcome_results_csv","created_at":"2020-08-26T07:39:32-07:00","started_at":null,"ended_at":null,"file_url":null}
report progress 0
report progress 6
report progress 46
report progress 73
report progress 100
https://sltr.instructure.com/api/v1/accounts/1/reports/outcome_results_csv/
running the report...
{"id":596,"progress":0,"parameters":{"accounts":"True","courses":"False","enrollments":"False","groups":"False","group_membership":"False","include_deleted":"False","sections":"False","terms":"False","users":"False","xlist":"False","enrollment_term":"4"},"current_line":null,"status":"created"

In [None]:
# Retrieve the list of terms in the system

terms_url = 'https://sltr.instructure.com/api/v1/accounts/1/terms'
response = requests.get(terms_url,headers=headers)
print(response.json())