## Data Analysis

In [2]:
import requests
from dotenv import load_dotenv
import os
import csv
import json

load_dotenv()


canvas_base_url = 'https://canvas.ubc.ca/api/v1'
access_token = os.environ.get('CANVAS_API_TOKEN')


def make_canvas_request(endpoint):
    url = f"{canvas_base_url}/{endpoint}"
    headers = {
        'Authorization': f'Bearer {access_token}'
    }
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API request failed with status code {response.status_code}: {response.text}")


def get_discussion_data(course_id, discussion_topic_id):
    endpoint = f'courses/{course_id}/discussion_topics/{discussion_topic_id}/view'
    return make_canvas_request(endpoint)

def json_to_csv(json_data, csv_filename):

    with open(csv_filename, mode='w', newline='') as file:
        writer = csv.writer(file)
        
        if isinstance(json_data, list) and len(json_data) > 0:
            headers = json_data[0].keys()
            writer.writerow(headers)
            
            for entry in json_data:
                writer.writerow(entry.values())
        else:
            writer.writerow(json_data.keys())
            writer.writerow(json_data.values())



In [None]:
course_id = 117933  # Replace with your course ID
discussion_topic_id = 1823292  # Replace with your discussion topic ID

# DEBUG USAGE
# access_token = ""

try:
    discussion_data = get_discussion_data(course_id, discussion_topic_id)

    json_to_csv(discussion_data, 'discussion_data.csv')
    print("CSV file has been created successfully.")
    print(discussion_data)
except Exception as e:
    print(f"Error: {e}")

In [None]:
# DEBUG

# print(course_id)
# print(discussion_topic_id)
# print(access_token)


In [None]:
import csv
import html
from bs4 import BeautifulSoup  # Import BeautifulSoup for HTML parsing

participants = discussion_data.get('participants', [])
user_dict = {participant['id']: participant for participant in participants}

posts_data = discussion_data.get('view', [])

def clean_html(raw_html):
    """Converts HTML to plain text."""
    soup = BeautifulSoup(raw_html, "html.parser")
    return soup.get_text(separator=' ')

def get_reply_messages(replies):
    """Extracts, cleans, and wraps the message portion of each reply in quotes, separated by pipes."""
    return ' || '.join([f'"{clean_html(html.unescape(reply.get("message", "No message")))}"' for reply in replies])

def process_posts(posts, parent_id=None, level=0):
    rows = []
    for post in posts:
        user = user_dict.get(post.get('user_id'), {})
        message = clean_html(html.unescape(post.get('message', '')))

        replies = post.get('replies', [])
        flattened_replies = get_reply_messages(replies) if replies else None
        
        row = {
            'id': post.get('id', ''),
            'user_id': post.get('user_id', ''),
            'display_name': user.get('display_name', 'Unknown'),
            'parent_id': parent_id,
            'created_at': post.get('created_at', ''),
            'updated_at': post.get('updated_at', ''),
            'message': message,
            'level': level,
            'replies': flattened_replies  # store flattened replies seperated by a delimiter
        }
        rows.append(row)

        if replies:
            rows.extend(process_posts(replies, parent_id=post.get('id'), level=level+1))
    return rows

all_rows = process_posts(posts_data)

all_rows.sort(key=lambda x: x['created_at'])

fieldnames = ['id', 'user_id', 'display_name', 'parent_id', 'created_at', 'updated_at', 'message', 'level', 'replies']

csv_file_path = 'flattened_replies_messages_with_pipe.csv'
with open(csv_file_path, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quotechar='"', quoting=csv.QUOTE_ALL)
    writer.writeheader()
    writer.writerows(all_rows)

print(f"Data has been successfully converted to '{csv_file_path}'.")
