In [3]:
import os
import json
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
import pandas as pd
import numpy as np


In [7]:
def load_credentials():
    load_dotenv()
    email = os.getenv("DASH_EMAIL")
    password = os.getenv("DASH_PASSWORD")
    if not email or not password:
        raise ValueError("EMAIL and PASSWORD must be set in the .env file")
    return email, password

def get_csrf_token(session, login_url):
    response = session.get(login_url)
    soup = BeautifulSoup(response.text, "html.parser")
    token = soup.find("input", {"name": "csrfmiddlewaretoken"})
    if not token:
        raise RuntimeError("CSRF token not found in login page.")
    return token["value"]

def login(session, login_url, email, password, csrf_token):
    login_data = {
        "email": email,
        "password": password,
        "csrfmiddlewaretoken": csrf_token
    }
    headers = {"Referer": login_url}
    response = session.post(login_url, data=login_data, headers=headers)
    return response

def fetch_api_data(session, api_url):

    response = session.get(api_url)
    content_type = response.headers.get("Content-Type", "")
    
    if "application/json" not in content_type:
        raise ValueError("API response is not JSON:\n" + response.text[:300])
    
    data = response.json()
    if not data.get("success", False):
        raise RuntimeError("API returned failure: " + data.get("message", "No message"))
    
    if "result" not in data:
        raise KeyError("'result' key not found in API response.")
    
    return data["result"]

def jsonToExcel(data, supervisorMainFields, studentMainFields, outputFile):
    flattenedEntries = []
    nestedEntries = []
    for entry in data:
        date = entry.get("date")
        
        # Base fields
        baseData = {k: v for k, v in entry.items() if not isinstance(v, dict)}
        
        # Supervisor fields
        supervisorData = entry.get('supervisor_data', {})
        supervisorFieldsExtracted = {k: supervisorData.get(k) for k in supervisorMainFields}
        supervisorRemainingData = {k: v for k, v in supervisorData.items() if k not in supervisorMainFields}
        
        # Student fields
        studentData = entry.get('student_data', {})
        studentFieldsExtracted = {k: studentData.get(k) for k in studentMainFields}
        studentRemainingData = {k: v for k, v in studentData.items() if k not in studentMainFields}
        
        # Fully flattened (all keys)
        flattenedEntry = {**baseData, **studentData, **supervisorData}
        flattenedEntries.append(flattenedEntry)

        # Nested version
        nestedEntry = {
            **baseData,
            **supervisorFieldsExtracted,
            **studentFieldsExtracted,
            'student_data': json.dumps(studentRemainingData, indent=2, ensure_ascii=False),
            'supervisor_data': json.dumps(supervisorRemainingData, indent=2, ensure_ascii=False)
        }
        nestedEntries.append(nestedEntry)

    with pd.ExcelWriter(outputFile, engine='openpyxl') as writer:
        # Save flattened all_data
        df_all_flat = pd.DataFrame(flattenedEntries)
        df_all_flat.to_excel(writer, sheet_name='all_data_flat', index=False)

        # Save nested all_data
        df_all_nested = pd.DataFrame(nestedEntries)
        df_all_nested.to_excel(writer, sheet_name='all_data_nested', index=False)

def getandprintData(session, api_url):
    try:
        print(f"🔄 Fetching API data for {api_url}...")
        result_data = fetch_api_data(session, api_url)
        print("✅ API data retrieved successfully.")
        json_preview = json.dumps(result_data, indent=2)
        # print("Preview of result (first 1500 characters):")
        # print(json_preview[:1500] + "..." if len(json_preview) > 1500 else json_preview)
        return result_data
    except Exception as e:
        print(f"❌ Error fetching API data: {e}")
        return []

In [8]:
def main():
    login_url = "https://www.unimelb-dash.com/accounts/login/"
    api_url_1 = "https://www.unimelb-dash.com/assessment/get/"
    api_url_2 = "https://www.unimelb-dash.com/assessment/mini-cex/get/"
    api_url_3 = "https://www.unimelb-dash.com/assessment/osce/get/"

    # fields to pull out of supervis
    supervisorMainFields = ['time_mgmt', 'entrustment', 'communication', 'professionalism', 'assessor_feedback', 
                        'clinical_incident', 'patient_complexity']
    studentMainFields = ['student_feedback']
    session = requests.Session()
    try:
        email, password = load_credentials()
        csrf_token = get_csrf_token(session, login_url)
        login_response = login(session, login_url, email, password, csrf_token)

        if "Please sign in" in login_response.text:
            print("❌ Login failed: invalid credentials or CSRF mismatch.")
            return

        print("✅ Login successful!")

        result_data_1 = getandprintData(session, api_url_1)
        result_data_2 = getandprintData(session, api_url_2) # mini-cex
        result_data_3 = getandprintData(session, api_url_3) # osce
        
    except Exception as e:
        print(f"❌ Error: {e}")
        return

    outputFile = "2025/_unimelb_dash_all_data.xlsx"
    # jsonToExcel(result_data_1, supervisorMainFields, studentMainFields, outputFile)
    # print(f"✅ Data saved to {outputFile} successfully.")

if __name__ == "__main__":
    main()

✅ Login successful!
🔄 Fetching API data for https://www.unimelb-dash.com/assessment/get/...
✅ API data retrieved successfully.
🔄 Fetching API data for https://www.unimelb-dash.com/assessment/mini-cex/get/...
✅ API data retrieved successfully.
🔄 Fetching API data for https://www.unimelb-dash.com/assessment/osce/get/...
❌ Error fetching API data: API response is not JSON:



<!doctype html>
<html lang="en">
    <head><script src="/mothine-Hath-Dised-Hearight-Neitfalse-deepe-thin" async></script>
        
          <title>Unimelb Dash</title>
        
        <meta charset="utf-8" />
        <meta name="viewport" content="width=device-width, initial-scale=1" />
      
