In [10]:
# Importing required libraries
import json
import os
import re
import shutil
import requests
import pandas as pd
from bs4 import BeautifulSoup

base_url = 'https://canvas.illinois.edu' 

# File paths
rubric_file = 'docs/rubric_data.json'
discussion_entries_file = 'docs/discussion_entries.json'

# Make sure the 'docs' directory exists
os.makedirs('docs', exist_ok=True)


In [None]:
# This function extracts individual discussion entries from the Canvas LMS API.
# Individual discussion entries are the replies to the original discussion post.
# It takes the base URL of the Canvas instance, course ID, discussion topic ID, and headers for authentication.
# It returns a list of individual entries with their details such as ID, parent ID, user ID, and message content.

def extract_individual_discussion(base_url, course_id, discussion_topic_id, headers):
    individual_entries = []
    discussion_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}/view'

    discussion_response = requests.get(discussion_url, headers=headers)
    if discussion_response.ok:
        discussion_data = discussion_response.json()
        entries = extract_entries(discussion_data['view'], discussion_data['participants'])
        individual_entries.extend(entries)
    else:
        print(f"Error fetching individual discussion: {discussion_response.text}")

    return individual_entries


course_id = '32545'
discussion_topic_id = '352017'
headers = {'Authorization': f'Bearer {access_token}'}
individual_discussion_data = extract_individual_discussion(base_url, course_id, discussion_topic_id, headers)
print(json.dumps(individual_discussion_data, indent=4))


In [26]:

# This function extracts entries from the discussion data.
# It takes a list of entries and participants as input.
# It returns a list of entries with their details such as ID, parent ID, user ID, name, message content, and replies.
def extract_entries(entries, participants):
    result = []
    for entry in entries:
        if 'message' in entry and 'deleted' not in entry:
            id = entry['id']
            parent_id = entry.get('parent_id')
            user_id = entry.get('user_id')
            
            # Find participant name, handle missing 'name' key
            participant = next((p for p in participants if p.get('id') == user_id), None)
            name = participant.get('name') if participant else "Unknown"

            message = entry['message']
            replies = extract_entries(entry.get('replies', []), participants)
            result.append({'id': id, 'parent_id': parent_id, 'name': name, 'message': message, 'replies': replies})
    return result

# This function extracts group discussions from the Canvas LMS API.
# Group discussions are the discussions that are created for seperate groups.
# It takes the base URL of the Canvas instance, headers for authentication, and group topic children as input.
# It returns a list of group entries with their details such as group ID and entries.
def extract_group_discussions(base_url, headers, group_topic_children):
    group_entries = []
    for group_topic in group_topic_children:
        group_id = group_topic['group_id']
        topic_id = group_topic['id']
        group_discussion_url = f'{base_url}/api/v1/groups/{group_id}/discussion_topics/{topic_id}/view'

        group_discussion_response = requests.get(group_discussion_url, headers=headers)
        if group_discussion_response.ok:
            group_discussion_data = group_discussion_response.json()
            entries = extract_entries(group_discussion_data['view'], group_discussion_data['participants'])
            group_entries.append({'group_id': group_id, 'entries': entries})
        else:
            print(f"Error fetching group discussion for group {group_id}: {group_discussion_response.text}")

    return group_entries

# This function fetches the main discussion topic from the Canvas LMS API.
# It takes the base URL of the Canvas instance, course ID, discussion topic ID, and headers for authentication as input.
# It returns the main discussion topic data if the request is successful, otherwise it returns None.
def fetch_main_discussion_topic(base_url, course_id, discussion_topic_id, headers):
    url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
    response = requests.get(url, headers=headers)
    if response.ok:
        return response.json()
    else:
        print(f"Error fetching main discussion topic: {response.text}")
        return None

course_id = '32545'
discussion_topic_id = '352017'
main_discussion_topic = fetch_main_discussion_topic(base_url, course_id, discussion_topic_id, headers)
if main_discussion_topic and 'group_topic_children' in main_discussion_topic:
    group_discussions = extract_group_discussions(base_url, headers, main_discussion_topic['group_topic_children'])
print(json.dumps(group_discussions, indent=4))


[]


In [23]:
# Extracts the instructions and rubric from the Canvas LMS API.
# Gets data from the assignment field from the discussion data.
def extract_and_save_instruction_base(base_url, course_id, discussion_topic_id, headers, rubric_file):
    instruction_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
    instruction_response = requests.get(instruction_url, headers=headers)

    if instruction_response.ok:
        instruction_data = instruction_response.json()
        rubric = []

        # Extract title
        title = instruction_data.get('title')
        if title:
            rubric.append({'title': title})

        # Extract instruction description
        if 'description' in instruction_data.get('assignment', {}):
            message_html = instruction_data['assignment']['description']
            soup = BeautifulSoup(message_html, 'html.parser')
            message = soup.get_text()
            rubric.append({'instruction': message})

        # Extract rubric and points possible
        if 'rubric' in instruction_data.get('assignment', {}) and 'description' in instruction_data['assignment']:
            rubric.extend(instruction_data['assignment']['rubric'])

            points_possible = instruction_data['assignment'].get('points_possible')
            if points_possible is not None:
                rubric.append({'points_possible': points_possible})

            # Handling the 'docs' folder
            if not os.path.exists('docs'):
                os.makedirs('docs')

            # Save to JSON file
            with open(rubric_file, 'w') as f:
                json.dump(rubric, f)

            print("Extracted instructions and rubric")
        else:
            print("No instruction data found")
    else:
        print(f'Error: {instruction_response.text}')

course_id = '32545'
discussion_topic_id = '352012'
extract_and_save_instruction_base(base_url, course_id, discussion_topic_id, headers, rubric_file)

No instruction data found


In [None]:
# This function fetches and displays the raw discussion data from the Canvas LMS API.
# It takes the base URL of the Canvas instance, course ID, discussion topic ID, and headers for authentication as input.
# The function prints the raw JSON data of the discussion topic if the request is successful.
# If the request fails, it prints an error message with the response text.
def fetch_and_display_raw_discussion_data(base_url, course_id, discussion_topic_id, headers):
    instruction_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
    response = requests.get(instruction_url, headers=headers)

    if response.ok:
        data = response.json()
        print(json.dumps(data, indent=4))  # Pretty print the JSON data
    else:
        print(f'Error fetching data: {response.status_code}, {response.text}')

#course_id = '42126'
#discussion_topic_id = '531789'        
fetch_and_display_raw_discussion_data(base_url, course_id, discussion_topic_id, headers)

In [36]:
# Solves the problem of not fetching isntructions if not in structured format
# This function extracts the instructions and rubric from the Message field.
def extract_and_save_instruction_message(base_url, course_id, discussion_topic_id, headers, rubric_file):
    instruction_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
    instruction_response = requests.get(instruction_url, headers=headers)

    if instruction_response.ok:
        instruction_data = instruction_response.json()
        rubric = []

        # Extract title
        title = instruction_data.get('title')
        if title:
            rubric.append({'title': title})

        # Extract message
        message_html = instruction_data.get('message')
        if message_html:
            soup = BeautifulSoup(message_html, 'html.parser')

            # Replace <br> tags with newlines
            for br in soup.find_all("br"):
                br.replace_with("\n")

            # Add newlines after <li> tags
            for li in soup.find_all("li"):
                li.insert_after(soup.new_string("\n"))

            # Add newlines before and after <p> tags
            for p in soup.find_all("p"):
                p.insert_before(soup.new_string("\n"))
                p.insert_after(soup.new_string("\n"))

            # Extract text
            message = soup.get_text()
            rubric.append({'instruction': message.strip()})
        else:
            rubric.append({'instruction': "No instruction data found"})

        # Handling the 'docs' folder
        if not os.path.exists('docs'):
            os.makedirs('docs')

        # Save to JSON file
        with open(rubric_file, 'w') as f:
            json.dump(rubric, f)

        print("Extracted instructions and rubric")
    else:
        print(f'Error: {instruction_response.text}')

extract_and_save_instruction_message(base_url, course_id, discussion_topic_id, headers, rubric_file)

Extracted instructions and rubric


In [37]:
# Final function to extract instructions and rubric from the Canvas LMS API.
def extract_and_save_instruction(base_url, course_id, discussion_topic_id, headers, rubric_file):
    instruction_url = f'{base_url}/api/v1/courses/{course_id}/discussion_topics/{discussion_topic_id}'
    instruction_response = requests.get(instruction_url, headers=headers)

    if instruction_response.ok:
        instruction_data = instruction_response.json()
        rubric = []

        # Extract title
        title = instruction_data.get('title')
        if title:
            rubric.append({'title': title})

        # Try extracting from 'assignment' description first
        description_html = instruction_data.get('assignment', {}).get('description')
        if description_html:
            soup = BeautifulSoup(description_html, 'html.parser')
            description = soup.get_text()
            rubric.append({'instruction': description})
        else:
            # If no description, try extracting from 'message'
            message_html = instruction_data.get('message')
            if message_html:
                soup = BeautifulSoup(message_html, 'html.parser')

                # Handle HTML content for message
                for br in soup.find_all("br"):
                    br.replace_with("\n")
                for li in soup.find_all("li"):
                    li.insert_after(soup.new_string("\n"))
                for p in soup.find_all("p"):
                    p.insert_before(soup.new_string("\n"))
                    p.insert_after(soup.new_string("\n"))

                message = soup.get_text()
                rubric.append({'instruction': message.strip()})

        # Extract rubric and points possible, if available
        if 'rubric' in instruction_data.get('assignment', {}):
            rubric.extend(instruction_data['assignment']['rubric'])
            points_possible = instruction_data['assignment'].get('points_possible')
            if points_possible is not None:
                rubric.append({'points_possible': points_possible})

        # Handling the 'docs' folder
        if not os.path.exists('docs'):
            os.makedirs('docs')

        # Save to JSON file
        with open(rubric_file, 'w') as f:
            json.dump(rubric, f)

        print("Extracted instructions and rubric")
    else:
        print(f'Error: {instruction_response.text}')

extract_and_save_instruction(base_url, course_id, discussion_topic_id, headers, rubric_file)

Extracted instructions and rubric
