# Subcategory Aggregators (nbdev)

Implement and test subcategory aggregation logic here. Export production code with `#| export` cells. 


## Subcategory Aggregation Method

The method for doing the subcategory aggregation will proceed something like this:

### Pseudocode for Aggregator Loop

```python
for student in students:
    # Create a new JSON to store data for the student
    student_data = {}
    
    for subcategory in subcategories:
        # Go fetch all the data for a given student
        student_submissions = fetch_student_submissions(student.id, subcategory.id)
        
        # Aggregate their data for the subcategory, with switches depending on the category
        aggregated_score = aggregate_subcategory_data(student_submissions, subcategory)
        
        student_data[subcategory] = aggregated_score
    
    # Store JSON for the student in STUDENT_SUBCATEGORY_SCORES table
    store_student_subcategory_scores(student.id, student_data)
```

Then we loop through that until it's achieved for all students.

**For testing purposes:**
- Option to do single subcategories at a time
- Option to do single students

In [5]:
#| default_exp aggregators
#| export
import logging
from datetime import datetime
from typing import Optional, Dict, Any, List
import os

import requests
from supabase import Client
from apex_scoring.bell_curve import BellCurveCalculator

logger = logging.getLogger(__name__)

from dotenv import load_dotenv
load_dotenv()

# Simple, env-configurable logging setup
# LOG_LEVEL: DEBUG|INFO|WARNING|ERROR, LOG_FORMAT: 'json' or 'plain'
LOG_LEVEL = (os.getenv('LOG_LEVEL') or 'INFO').upper()
LOG_FORMAT = (os.getenv('LOG_FORMAT') or 'plain').lower()

def setup_logging(level: str = LOG_LEVEL, fmt: str = LOG_FORMAT) -> None:
    level_value = getattr(logging, level, logging.INFO)
    handlers: list[logging.Handler] = []
    stream = logging.StreamHandler()
    if fmt == 'json':
        try:
            import json, time
            class JsonFormatter(logging.Formatter):
                def format(self, record: logging.LogRecord) -> str:
                    payload = {
                        'ts': int(time.time()),
                        'level': record.levelname,
                        'name': record.name,
                        'msg': record.getMessage(),
                    }
                    if record.exc_info:
                        payload['exc_info'] = self.formatException(record.exc_info)
                    return json.dumps(payload)
            stream.setFormatter(JsonFormatter())
        except Exception:
            stream.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(name)s: %(message)s'))
    else:
        stream.setFormatter(logging.Formatter('%(asctime)s [%(levelname)s] %(name)s: %(message)s'))
    handlers.append(stream)

    root = logging.getLogger()
    root.handlers.clear()
    root.setLevel(level_value)
    for h in handlers:
        root.addHandler(h)

# Initialize logging now for notebooks and for exported modules
setup_logging()

# Secondary logger for external calls
populi_logger = logging.getLogger('populi')


class PopuliClient:
    """Thin HTTP client for Populi API v2 using bearer auth.

    Requires `POPULI_URL` and `POPULI_API_KEY` in the environment.
    """
    def __init__(self, base_url: Optional[str] = None, api_key: Optional[str] = None):
        self.base_url = (base_url or os.getenv("POPULI_URL") or "").rstrip("/")
        self.api_key = api_key or os.getenv("POPULI_API_KEY")
        if not self.base_url or not self.api_key:
            raise RuntimeError("POPULI_URL and POPULI_API_KEY must be set for Populi access")

    def _request(self, endpoint: str, params: Optional[Dict[str, Any]] = None) -> Any:
        url = f"{self.base_url}/api2/{endpoint.lstrip('/')}"
        headers = {
            "Authorization": f"Bearer {self.api_key}",
            "Content-Type": "application/json",
        }
        populi_logger.debug(f"GET {url} params={params}")
        resp = requests.get(url, headers=headers, params=params or {}, timeout=30)
        populi_logger.debug(f"RESP {resp.status_code} for {url}")
        resp.raise_for_status()
        try:
            return resp.json()
        except Exception:
            return resp.text

    # Convenience wrappers matching server utils used in the web app
    def get_student(self, person_id: str) -> Any:
        return self._request(f"people/{person_id}/student")

    def get_enrollments(self, person_id: str, *, academic_term_id: Optional[str] = None, expand: Optional[str] = None) -> Any:
        params: Dict[str, Any] = {}
        if academic_term_id:
            params["academic_term_id"] = academic_term_id
        if expand:
            params["expand"] = expand
        return self._request(f"people/{person_id}/enrollments", params)

    def get_course_offering(self, course_offering_id: str) -> Any:
        return self._request(f"courseofferings/{course_offering_id}")

    def get_academic_terms(self) -> Any:
        return self._request("academicterms")

    


def _coerce_float(value: Any) -> Optional[float]:
    try:
        if value is None:
            return None
        if isinstance(value, (int, float)):
            return float(value)
        s = str(value).strip()
        if s == "":
            return None
        return float(s)
    except Exception:
        return None


def _letter_to_gpa_points(letter: Optional[str]) -> Optional[float]:
    if not letter or not isinstance(letter, str):
        return None
    l = letter.strip().upper()
    # Simple 4.0 scale mapping; adjust if Populi provides exact points
    mapping = {
        "A+": 4.0, "A": 4.0, "A-": 3.7,
        "B+": 3.3, "B": 3.0, "B-": 2.7,
        "C+": 2.3, "C": 2.0, "C-": 1.7,
        "D+": 1.3, "D": 1.0, "D-": 0.7,
        "F": 0.0,
    }
    return mapping.get(l)


def _percent_to_gpa(percent: Optional[float]) -> Optional[float]:
    p = _coerce_float(percent)
    if p is None:
        return None
    # Approximate conversion: 90+=4.0, 80+=3.0, 70+=2.0, 60+=1.0 else 0.0
    if p >= 90:
        return 4.0
    if p >= 80:
        return 3.0
    if p >= 70:
        return 2.0
    if p >= 60:
        return 1.0
    return 0.0


#This is the class that runs the whole process, then the cells below are for testing the class

class SubcategoryAggregator:
    GPA_SUBCATEGORY_IDS = {
        'f50830fe-b820-4223-89e2-e69241b459af',  # practicum grade
        '8d13f1b9-33e1-4a62-be45-488a6834112f',  # spiritual formation grade
        'd1d972a4-2484-4b9a-a53c-0b63bb2e952c',  # overall GPA
    }

    def __init__(self, supabase_client: Client):
        self.supabase = supabase_client
        self.bell_curve = BellCurveCalculator()
        logger.info("SubcategoryAggregator initialized")
    
    def get_students(self) -> list[dict]:
        resp = self.supabase.table('students').select('*').execute()
        return resp.data

    def get_single_student(self, student_id: str) -> dict:
        resp = self.supabase.table('students').select('*').eq('id', student_id).execute()
        return resp.data

    def get_subcategories(self) -> list[dict]:
        resp = self.supabase.table('subcategories').select('*').execute()
        return resp.data

    def get_single_subcategory(self, subcategory_id: str) -> dict:
        resp = self.supabase.table('subcategories').select('*').eq('id', subcategory_id).execute()
        return resp.data

    def get_student_submissions(self, student_id: str) -> list[dict]:
        resp = self.supabase.table('event_submissions').select('*').eq('student_id', student_id).eq('approval_stauts', 'approved').execute()
        return resp.data

    def get_student_submission_by_subcategory(self, student_id: str, subcategory_id: str) -> dict:
        resp = self.supabase.table('event_submissions').select('*').eq('student_id', student_id).eq('subcategory_id', subcategory_id).eq('approval_status', 'approved').execute()
        return resp.data

    def aggregate_subcategory_score(self, subcategory_name: str, subcategory_id: str, student_id: str) -> dict:
        logger.debug(f"aggregate_subcategory_score start subcategory={subcategory_name} sid={subcategory_id} student={student_id}")
  
        # 
        data_points_count = 0     
        
        #Route to appropriate function based on subcategory name
        if subcategory_name == 'fellow_friday_participation' or subcategory_name == 'gbe_participation' or subcategory_name == 'chapel_participation' or subcategory_name == 'company_team_building':
            student_subcategory_submissions = self.get_student_submission_by_subcategory(student_id, subcategory_id)
            data_points_count = len(student_subcategory_submissions)
            return self.aggregate_involvement_scores(student_subcategory_submissions), data_points_count

        elif subcategory_name == 'practicum_grade' or subcategory_name == 'spiritual_formation_grade' or subcategory_name == 'class_attendance_grades':
            data_points_count = 1
            return self.aggregate_gpa(subcategory_name, student_id), data_points_count

        elif subcategory_name == 'credentials_certifications' or subcategory_name == 'job_promotion_opportunities':
            student_subcategory_submissions = self.get_student_submission_by_subcategory(student_id, subcategory_id)
            data_points_count = len(student_subcategory_submissions)
            return self.aggregate_professional_development(student_subcategory_submissions), data_points_count

        elif subcategory_name == 'community_service_hours':
            student_subcategory_submissions = self.get_student_submission_by_subcategory(student_id, subcategory_id)
            data_points_count = len(student_subcategory_submissions)
            return self.aggregate_service_hours(student_subcategory_submissions), data_points_count

        elif subcategory_name == 'lions_games_involvement':
            student_subcategory_submissions = self.get_student_submission_by_subcategory(student_id, subcategory_id)
            data_points_count = len(student_subcategory_submissions)
            return self.aggregate_lions_games_scores(student_subcategory_submissions), data_points_count

        elif subcategory_name == 'small_group_involvement' or subcategory_name == 'dream_team_involvement':
            student_subcategory_submissions = self.get_student_submission_by_subcategory(student_id, subcategory_id)
            data_points_count = len(student_subcategory_submissions)
            return self.aggregate_monthly_checkins(student_subcategory_submissions), data_points_count

        elif subcategory_name == 'fellow_friday_attendance' or subcategory_name == 'gbe_attendance' or subcategory_name == 'chapel_attendance' or subcategory_name == 'company_community_events':
            student_subcategory_submissions=self.get_student_submission_by_subcategory(student_id, subcategory_id)
            data_points_count = len(student_subcategory_submissions)
            return self.aggregate_attendance_percentage(student_subcategory_submissions), data_points_count

        #fallback to error
        else:
            raise ValueError(f"No match found for subcategory_name={subcategory_name}")

    def aggregate_attendance_percentage(self, student_subcategory_submissions: list[dict]) -> dict:
        #Get total occurences, then divide present count by total count. If no data, they were not present.

        #Get total occurences, then divide present count by total count. If no dat
        #Data structure is in event_submissions.submission_data
        # {"notes": "gbe_participation attendance #3", "status": "present", "submission_type": "attendance"}
        
        total_occurences = len(student_subcategory_submissions)
        present_count = sum(1 for submission in student_subcategory_submissions if submission['submission_data']['status'] == 'present')
        attendance_percentage = (present_count / total_occurences) * 100 if total_occurences > 0 else 0
        return attendance_percentage 


    def aggregate_monthly_checkins(self, student_subcategory_submissions: list[dict]) -> dict:

        #Data Structure:
        #{'notes': 'small_group monthly check #1', 'status': 'involved', 'submission_type': 'small_group'}
        #Alt type for status is 'not_involved'

        #Get total occurences, then divide present count by total count. If no data, they were not present.
        total_occurences = len(student_subcategory_submissions)
        present_count = sum(1 for submission in student_subcategory_submissions if submission['submission_data']['status'] == 'involved')
        monthly_checkin_percentage = (present_count / total_occurences) * 100 if total_occurences > 0 else 0
        return monthly_checkin_percentage


    def aggregate_involvement_scores(self, student_subcategory_submissions: list[dict]) -> dict:

        #Data Structure:
        # {'notes': 'gbe_participation attendance #1', 'points': 1, 'submission_type': 'participation'}

        print(f"Student subcategory submissions: {student_subcategory_submissions}")

        #Get total points
        total_points = sum(submission['submission_data']['points'] for submission in student_subcategory_submissions)
 
        return total_points
        
    def aggregate_service_hours(self, student_subcategory_submissions: list[dict]) -> dict:

        #Data Structure:
        #{'hours': 4, 'description': 'Magni culpo necessitatibus sulum timidus aeneus sit ancilla adstringo corporis.', 'organization': 'Community Garden', 'date_of_service': '2025-10-07', 'submission_type': 'community_service', 'supervisor_name': 'Trevor West', 'supervisor_contact': 'Chad.Zemlak@yahoo.com'}

        #Get total hours, then divide by 12. If no data, they did not meet the cap.
        total_hours = sum(submission['submission_data']['hours'] for submission in student_subcategory_submissions)
        capped_hours = min(total_hours, 12) if total_hours > 0 else 0
        return capped_hours

    def aggregate_professional_development(self, student_subcategory_submissions: list[dict]) -> dict:

        #Data Structure can be either of below, but we only care about the assigned_points for either
        #Job Promotion:
        # {'description': 'Aqua certus undique paens dolorum illo capio video abbas currus adnuo patruus.', 'organization': 'ACU Blueprintprintprintprintprintprintprintprintprintprint', 'assigned_points': 50, 'promotion_title': 'Assistant → Senior Assistant', 'submission_type': 'job_promotion', 'supervisor_name': 'Eugene Kunze', 'date_of_promotion': '2025-04-14', 'supervisor_contact': 'Fred.Hirthe@gmail.com'}
        # Credentials:
        # {'description': 'Synagoga provident audacia patior demergo alii ter aperiam unus desipio.', 'assigned_points': 33, 'credential_name': 'Google Analytics', 'submission_type': 'credentials', 'date_of_credential': '2025-06-14', 'granting_organization': 'Professional Certification Body'}

        #Get total points
        total_points = sum(submission['submission_data']['assigned_points'] for submission in student_subcategory_submissions)
        return total_points

    
    def aggregate_lions_games_scores(self, student_subcategory_submissions: list[dict]) -> dict:

        #Data Structure:
        # {'notes': 'lions games #1', 'assigned_points': 1, 'submission_type': 'lions_games'}

        #Get total points
        total_points = sum(submission['submission_data']['assigned_points'] for submission in student_subcategory_submissions)
        return total_points

    def aggregate_gpa(self, subcategory_name: str, student_uuid: str) -> dict:
        #So here we need to get the Populi API for either spiritual formation, practicum, or overall gpa

        #First, hit USERS table to get populi_id
        #Then, route to appropriate function based on subcategory name
        #Then figure out the populi API calls

        # Resolve Populi person id from our DB: students.student_id -> users.id -> users.populi_id
        # 1) Find student row by 6-digit student_uuid
        user_data = self.supabase.table('users').select('*').eq('id', student_uuid).limit(1).execute()
        if not user_data.data:
            raise ValueError(f"No user found for student_uuid={student_uuid}")
        user_id = user_data.data[0]['id']
        populi_id = user_data.data[0]['populi_id']

        #Then route to appropriate function based on subcategory name
        populi = PopuliClient()
        try:
            enrollments_payload = populi.get_enrollments(populi_id, expand='courseoffering')
        except Exception as e:
            logger.exception("Failed to fetch enrollments from Populi for populi_id=%s", populi_id)
            raise

        # Normalize payload to an array of enrollment rows
        if isinstance(enrollments_payload, dict) and 'data' in enrollments_payload:
            enrollments = enrollments_payload.get('data') or []
            if isinstance(enrollments, dict) and 'data' in enrollments:
                enrollments = enrollments.get('data') or []
        else:
            enrollments = enrollments_payload if isinstance(enrollments_payload, list) else []

        # Route by subcategory
        key = (subcategory_name or '').strip().lower()
        if key == 'practicum_grade':
            return self._compute_course_gpa(enrollments, include_keywords=['practicum'])
        if key == 'spiritual_formation_grade':
            return self._compute_course_gpa(enrollments, include_keywords=['spiritual', 'formation'])
        if key == 'class_attendance_grades':
            #This is not apply named at the moment as it only calculates the grades, will need to adjust Supabase subcategories and function logic in the future
            return self._compute_overall_gpa(enrollments)
            #return self._compute_attendance_percentage(enrollments)

        # Fallback: throw error because couldn't find a match
        raise ValueError(f"No match found for subcategory_name={subcategory_name}")

    def _extract_course_meta(self, enrollment: Dict[str, Any]) -> Dict[str, Any]:
        populi = PopuliClient()
        offering = populi.get_course_offering(enrollment['course_offering_id'])
        catalog_courses = offering.get('catalog_courses') or []
        catalog = catalog_courses[0] if isinstance(catalog_courses, list) and catalog_courses else {}
        return {
            'code': catalog.get('abbrv') or offering.get('abbrv'),
            'name': catalog.get('name') or offering.get('name'),
            'credits': enrollment.get('credits') or offering.get('credits'),
        }

    def _matches_keywords(self, text: Optional[str], keywords: list[str]) -> bool:
        if not text:
            return False
        t = text.lower()
        return all(k.lower() in t for k in keywords)

    def _enrollment_to_gpa(self, enrollment: Dict[str, Any]) -> Optional[float]:
        # Prefer letter grade if present
        letter = enrollment.get('letter_grade') or enrollment.get('letterGrade')
        points = _letter_to_gpa_points(letter)
        if points is not None:
            return points
        # Fallback to numeric percent
        percent = enrollment.get('final_grade') or enrollment.get('finalGrade')
        # print(f"Percent: {percent}")
        # print(f"Points: {_percent_to_gpa(percent)}")
        return _percent_to_gpa(percent)

    def _compute_course_gpa(self, enrollments: list[Dict[str, Any]], include_keywords: list[str]) -> float:
        matching: list[tuple[float, float]] = []  # (gpa_points, credits)
        for en in enrollments:
            meta = self._extract_course_meta(en)
            course_text = f"{meta.get('code') or ''} {meta.get('name') or ''}".strip()
            if not self._matches_keywords(course_text, include_keywords):
                continue
            gpa_points = self._enrollment_to_gpa(en)
            if gpa_points is None:
                continue
            credits = _coerce_float(meta.get('credits')) or 1.0
            matching.append((gpa_points, credits))

        if not matching:
            #If nothing matches, means they aren't in the class so we return None, not 0.0
            return None
        total_points = sum(g * c for g, c in matching)
        total_credits = sum(c for _, c in matching)
        #Here, None doesn't mean 0, it means they don't have a grade, so we need to return None
        return float(total_points / total_credits) if total_credits > 0 else None

    def _compute_overall_gpa(self, enrollments: list[Dict[str, Any]]) -> float:
        samples: list[tuple[float, float]] = []
        for en in enrollments:
            gpa_points = self._enrollment_to_gpa(en)
            if gpa_points is None:
                continue
            meta = self._extract_course_meta(en)
            credits = _coerce_float(meta.get('credits')) or 1.0
            samples.append((gpa_points, credits))
        if not samples:
            #If no courses, return None
            return None
        total_points = sum(g * c for g, c in samples)
        total_credits = sum(c for _, c in samples)
        return float(total_points / total_credits) if total_credits > 0 else 0.0

    def _compute_attendance_percentage(self, enrollments: list[Dict[str, Any]]) -> float:
        """Attempt to compute attendance percentage across enrollments.
        NOTE: This function is untested as it was not used in the first iteration of the app as of August 2025.
        Tries several shapes commonly seen in Populi payloads.
        Falls back to 0.0 if no attendance data is present.
        """
        total_present = 0
        total_meetings = 0
        for en in enrollments:
            # Common fields if provided
            percent = en.get('attendance_percentage') or en.get('attendancePercent')
            if percent is not None:
                p = _coerce_float(percent)
                if p is not None:
                    total_present += p
                    total_meetings += 100
                    continue
            attendance = en.get('attendance') or {}
            present = attendance.get('present') or attendance.get('present_count')
            meetings = attendance.get('meetings') or attendance.get('total_classes')
            if present is not None and meetings:
                try:
                    total_present += float(present)
                    total_meetings += float(meetings)
                except Exception:
                    pass
        if total_meetings <= 0:
            return None #If no meetings, need to not count this category so return is None, not 0.0
        return float((total_present / total_meetings) * 100.0)

    def _get_latest_scores_for_subcategory(self, subcategory_id: str) -> list[dict]:
        """Fetch latest-day rows for a subcategory from `student_subcategory_scores`.
        Returns rows with fields: id, student_id, score, calculation_date.
        """
        latest_resp = (
            self.supabase
            .table('student_subcategory_scores')
            .select('calculation_date')
            .eq('subcategory_id', subcategory_id)
            .order('calculation_date', desc=True)
            .limit(1)
            .execute()
        )
        if not latest_resp.data:
            return []
        latest_date = latest_resp.data[0]['calculation_date']

        rows_resp = (
            self.supabase
            .table('student_subcategory_scores')
            .select('id, student_id, score, calculation_date')
            .eq('subcategory_id', subcategory_id)
            .eq('calculation_date', latest_date)
            .execute()
        )
        return rows_resp.data or []


    def _normalize_latest_subcategory_scores(self, subcategory_id: str) -> dict:
        """Apply bell-curve normalization for the latest scores of a subcategory.
        For GPA subcategories, sets normalized_score = score (no bell curve).
        For non-GPA subcategories, applies bell curve normalization.
        Returns summary stats.
        """
        rows = self._get_latest_scores_for_subcategory(subcategory_id)
        if not rows:
            return {'normalized': False, 'reason': 'No rows to process', 'count': 0}

        if subcategory_id in getattr(self, 'GPA_SUBCATEGORY_IDS', set()):
            # GPA subcategories: set normalized_score = score (no bell curve)
            updated_count = 0
            for row in rows:
                if row.get('score') is not None:
                    self.supabase.table('student_subcategory_scores').update({
                        'normalized_score': float(row['score'])
                    }).eq('id', row['id']).execute()
                    updated_count += 1
            
            return {
                'normalized': False,
                'reason': 'GPA subcategory - normalized_score set to raw score',
                'count': updated_count,
            }

        # Non-GPA subcategories: apply bell curve normalization
        raw_scores: List[float] = [float(r['score']) for r in rows if r.get('score') is not None]
        if not raw_scores:
            return {'normalized': False, 'reason': 'No scores to normalize', 'count': 0}

        # Ensure bell curve calculator is available
        if not hasattr(self, 'bell_curve'):
            from apex_scoring.bell_curve import BellCurveCalculator
            self.bell_curve = BellCurveCalculator()

        normalized_scores, stats = self.bell_curve.apply_bell_curve_to_scores(raw_scores)

        # Write back normalized_score per row (same ordering)
        for row, norm in zip(rows, normalized_scores):
            self.supabase.table('student_subcategory_scores').update({
                'normalized_score': float(norm)
            }).eq('id', row['id']).execute()

        return {
            'normalized': True,
            'count': len(normalized_scores),
            'raw_stats': stats.get('raw_stats'),
            'normalized_stats': stats.get('normalized_stats'),
        }


    def normalize_all_subcategories_for_latest_day(self) -> dict:
        """Find all subcategories with rows on the latest day and normalize them.
        GPA subcategories get normalized_score = score, others get bell curve normalization."""
        latest_resp = (
            self.supabase
            .table('student_subcategory_scores')
            .select('calculation_date')
            .order('calculation_date', desc=True)
            .limit(1)
            .execute()
        )
        if not latest_resp.data:
            return {}
        latest_date = latest_resp.data[0]['calculation_date']

        sub_resp = (
            self.supabase
            .table('student_subcategory_scores')
            .select('subcategory_id')
            .eq('calculation_date', latest_date)
            .execute()
        )
        sub_ids = sorted({r['subcategory_id'] for r in (sub_resp.data or [])})

        results = {}
        for sid in sub_ids:
            # Process all subcategories (GPA ones will set normalized_score = score)
            results[sid] = self._normalize_latest_subcategory_scores(sid)
        return { 'latest_date': latest_date, 'results': results }


## This cell below if for testing the full aggregator for a single student

In [19]:
from datetime import datetime

supabase_client = Client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_SERVICE_ROLE_KEY'))

#Flow to test aggregator for single student

def test_aggregator_for_all_students():
    aggregator = SubcategoryAggregator(supabase_client)
    students = aggregator.get_students()
    for student in students:
        test_aggregator_for_single_student(student['id'])

def test_aggregator_for_single_student(student_id: str):
    student_subcategory_scores = {}
    aggregator = SubcategoryAggregator(supabase_client)
    student_submissions = aggregator.get_student_submissions(student_id)
    subcategories = aggregator.get_subcategories()
    for subcategory in subcategories:
        logger.info(f"Aggregating {subcategory['name']} for {student_id}")
        aggregate_subcategory_score, data_points_count = aggregator.aggregate_subcategory_score(subcategory['name'], subcategory['id'], student_id)

        #if aggregate_subcategory_score is None, skip entry
        if aggregate_subcategory_score is None:
            print(f"No score for {student_id} in {subcategory['name']}")
            continue

        resp = supabase_client.table('student_subcategory_scores').insert({
            'student_id': student_id,
            'subcategory_id': subcategory['id'],
            'score': aggregate_subcategory_score,
            'calculation_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'data_points_count': data_points_count,
            'academic_year_start': 2025,
            'academic_year_end': 2026
        }).execute()
        logger.info(f"Raw score for {student_id} in {subcategory['name']} of {aggregate_subcategory_score} was added to the table")

        #Test prints
        # print(f"Aggregate subcategory score: {aggregate_subcategory_score}")

        # print(f"Subcategory name: {subcategory['name']}")
        # dummy_json = {
        #     'student_id': student_id,
        #     'subcategory_id': subcategory['id'],
        #     'score': aggregate_subcategory_score,
        #     'calculation_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
        #     'data_points_count': data_points_count,
        #     'academic_year_start': 2025,
        #     'academic_year_end': 2026
        # }

        # print(f"Entry would be: {dummy_json}")

# Normalize a single subcategory (replace with an actual subcategory_id you want)
def test_normalize_single_subcategory(subcategory_id: str):
    aggregator = SubcategoryAggregator(supabase_client)
    result = aggregator._normalize_latest_subcategory_scores(subcategory_id)
    print('Single subcategory normalization result:')
    print(result)

# Normalize all non-GPA subcategories for the latest day
def test_normalize_all_latest_day():
    aggregator = SubcategoryAggregator(supabase_client)
    result = aggregator.normalize_all_subcategories_for_latest_day()
    print('All subcategories normalization for latest day:')
    print(result)

# test_aggregator_for_single_student('7630d221-e3aa-4126-a0e3-bee715160247')
# test_aggregator_for_all_students()
# test_normalize_single_subcategory('a3bab151-0ce1-402f-b507-7d6c3489bc8c') #Job promotion
test_normalize_all_latest_day()

2025-08-12 14:51:38,087 [INFO] apex_scoring.bell_curve: Initialized BellCurveCalculator with mean=3.0, std_dev=0.6
2025-08-12 14:51:38,087 [INFO] __main__: SubcategoryAggregator initialized
2025-08-12 14:51:38,418 [INFO] httpx: HTTP Request: GET https://ibucbpftrdxujktphifw.supabase.co/rest/v1/student_subcategory_scores?select=calculation_date&order=calculation_date.desc&limit=1 "HTTP/1.1 200 OK"
2025-08-12 14:51:38,531 [INFO] httpx: HTTP Request: GET https://ibucbpftrdxujktphifw.supabase.co/rest/v1/student_subcategory_scores?select=subcategory_id&calculation_date=eq.2025-08-12 "HTTP/1.1 200 OK"
2025-08-12 14:51:38,628 [INFO] httpx: HTTP Request: GET https://ibucbpftrdxujktphifw.supabase.co/rest/v1/student_subcategory_scores?select=calculation_date&subcategory_id=eq.0ceea111-1485-4a80-98a9-d82f3c12321c&order=calculation_date.desc&limit=1 "HTTP/1.1 200 OK"
2025-08-12 14:51:38,736 [INFO] httpx: HTTP Request: GET https://ibucbpftrdxujktphifw.supabase.co/rest/v1/student_subcategory_scores?

All subcategories normalization for latest day:
{'latest_date': '2025-08-12', 'results': {'0ceea111-1485-4a80-98a9-d82f3c12321c': {'normalized': True, 'count': 22, 'raw_stats': {'count': 22, 'mean': 13.318181818181818, 'std_dev': 6.717591313696542, 'min': 1.0, 'max': 27.0, 'percentiles': {'10th': 4.300000000000001, '25th': 9.25, '50th': 12.5, '75th': 17.0, '90th': 22.700000000000006}}, 'normalized_stats': {'count': 22, 'mean': 2.859090909090909, 'std_dev': 0.5907230476359159, 'min': 1.15, 'max': 3.81, 'percentiles': {'10th': 2.214, '25th': 2.5725, '50th': 2.965, '75th': 3.23, '90th': 3.5130000000000003}}}, '1a83ee45-7869-4f24-a70f-12ff3e1bc243': {'normalized': True, 'count': 22, 'raw_stats': {'count': 22, 'mean': 4.090909090909091, 'std_dev': 18.74690057027389, 'min': 0.0, 'max': 90.0, 'percentiles': {'10th': 0.0, '25th': 0.0, '50th': 0.0, '75th': 0.0, '90th': 0.0}}, 'normalized_stats': {'count': 22, 'mean': 1.2709090909090905, 'std_dev': 0.5540750612992061, 'min': 1.15, 'max': 3.81, '

In [None]:
#Get example return from a monthly checkin

student_id = '7630d221-e3aa-4126-a0e3-bee715160247'
small_group_subcategory_id = 'a32c3898-dbf1-4a92-a5db-811dfb6fcd0f'

supabase_client = Client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_SERVICE_ROLE_KEY'))

small_group_involvement = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, small_group_subcategory_id)

print(small_group_involvement[0]['submission_data'])

print("Community service hours example")

community_service_subcategory_id = 'bc062d8d-6e16-4f0a-84ca-5fd9d7c10f8c'
service_hours = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, community_service_subcategory_id)

print(service_hours[0]['submission_data'])

print("Particpation points example")

gbe_subcategory_id = '0ceea111-1485-4a80-98a9-d82f3c12321c'

gbe_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, gbe_subcategory_id)

print(gbe_subcategory_submissions[0]['submission_data'])


print("Professional development categories")
job_promotion_subcategory_id = 'a3bab151-0ce1-402f-b507-7d6c3489bc8c'
credential_subcategory_id = 'efdbc642-a52d-4872-ada5-2687fc03be73'

job_promotion_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, job_promotion_subcategory_id)

print(job_promotion_subcategory_submissions[0]['submission_data'])

credential_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, credential_subcategory_id)

print(credential_subcategory_submissions[0]['submission_data'])


print("Lions Games example")

lions_games_subcategory_id = '49ccaacd-d437-4421-809a-f957c8b4baf8'

lions_games_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, lions_games_subcategory_id)

print(lions_games_subcategory_submissions[0]['submission_data'])



{'notes': 'small_group monthly check #1', 'status': 'not_involved', 'submission_type': 'small_group'}
Community service hours example
{'hours': 4, 'description': 'Magni culpo necessitatibus sulum timidus aeneus sit ancilla adstringo corporis.', 'organization': 'Community Garden', 'date_of_service': '2025-10-07', 'submission_type': 'community_service', 'supervisor_name': 'Trevor West', 'supervisor_contact': 'Chad.Zemlak@yahoo.com'}
Particpation points example
{'notes': 'gbe_participation attendance #1', 'points': 1, 'submission_type': 'participation'}
Professional development categories
{'description': 'Aqua certus undique paens dolorum illo capio video abbas currus adnuo patruus.', 'organization': 'ACU Apex', 'assigned_points': 50, 'promotion_title': 'Assistant → Senior Assistant', 'submission_type': 'job_promotion', 'supervisor_name': 'Eugene Kunze', 'date_of_promotion': '2025-04-14', 'supervisor_contact': 'Fred.Hirthe@gmail.com'}
{'description': 'Synagoga provident audacia patior dem

## These are cells for testing individual functions/calls

In [12]:
#Test Populi client connection
populi = PopuliClient()
print(populi.get_academic_terms())

# Test academic terms endpoint
try:
    terms = populi.get_academic_terms()
    print(f"Academic terms response: {type(terms)}")
    if isinstance(terms, dict) and 'data' in terms:
        print(f"Found {len(terms['data'])} terms")
    print("Terms sample:", terms[:2] if isinstance(terms, list) else terms.get('data', [])[:2])
except Exception as e:
    print(f"Populi API test failed: {e}")

{'object': 'list', 'count': 17, 'results': 17, 'results_per_page': None, 'pages': 1, 'page': 1, 'offset': 0, 'has_more': False, 'data': [{'object': 'academic_term', 'id': 287327, 'academic_year_id': 74196, 'name': 'Spring 2026', 'display_name': '2025-2026: Spring 2026', 'start_date': '2026-01-13', 'end_date': '2026-05-08', 'grades_date': '2026-05-22', 'add_drop_time': '2026-01-27T06:00:00+00:00', 'enrollment_start_time': None, 'enrollment_end_time': None, 'max_enrolled_credits': None, 'max_enrolled_hours': None, 'max_audit_credits': None, 'max_audit_hours': None, 'non_standard': False, 'type': 'standard', 'evaluations_available_from': None, 'evaluations_available_to': None, 'evaluations_lock_grades_at': None, 'online_registration_delay_until': None, 'online_registration_randomization_seconds': 0, 'evaluations_lock_grades_until': None, 'evaluations_available_to_faculty': None, 'lms_sync': None, 'external_id': None, 'added_at': '2024-12-16T20:52:09+00:00', 'added_by_id': 24547414, 'start

In [68]:
# Test the student_id -> populi_id resolution logic
from supabase import Client
import os

supabase = Client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_SERVICE_ROLE_KEY'))
aggregator = SubcategoryAggregator(supabase)

# Test with a known student_uuid 
test_student_id = '7630d221-e3aa-4126-a0e3-bee715160247'  # Replace with actual student_id from your DB

#Set up case with 1 = practicum, 2 = spiritual formation, 3 = overall

case = 3

if case == 1:
    practicum_gpa = aggregator.aggregate_gpa('practicum_grade', test_student_id)
    print(f"Practicum GPA: {practicum_gpa}")
elif case == 2:
    spiritual_gpa = aggregator.aggregate_gpa('spiritual_formation_grade', test_student_id)
    print(f"Spiritual Formation GPA: {spiritual_gpa}")
elif case == 3:
    overall_gpa = aggregator.aggregate_gpa('class_attendance_grades', test_student_id)
    print(f"Overall GPA: {overall_gpa}")
else:
    print("Invalid case")



Aggregating GPA for class_attendance_grades
Overall GPA: 4.0


In [None]:
#Get example data structures for subcategories

student_id = '7630d221-e3aa-4126-a0e3-bee715160247'
small_group_subcategory_id = 'a32c3898-dbf1-4a92-a5db-811dfb6fcd0f'

supabase_client = Client(os.getenv('SUPABASE_URL'), os.getenv('SUPABASE_SERVICE_ROLE_KEY'))

small_group_involvement = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, small_group_subcategory_id)

print(small_group_involvement[0]['submission_data'])

print("Community service hours example")

community_service_subcategory_id = 'bc062d8d-6e16-4f0a-84ca-5fd9d7c10f8c'
service_hours = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, community_service_subcategory_id)

print(service_hours[0]['submission_data'])

print("Particpation points example")

gbe_subcategory_id = '0ceea111-1485-4a80-98a9-d82f3c12321c'

gbe_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, gbe_subcategory_id)

print(gbe_subcategory_submissions[0]['submission_data'])


print("Professional development categories")
job_promotion_subcategory_id = 'a3bab151-0ce1-402f-b507-7d6c3489bc8c'
credential_subcategory_id = 'efdbc642-a52d-4872-ada5-2687fc03be73'

job_promotion_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, job_promotion_subcategory_id)

print(job_promotion_subcategory_submissions[0]['submission_data'])

credential_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, credential_subcategory_id)

print(credential_subcategory_submissions[0]['submission_data'])


print("Lions Games example")

lions_games_subcategory_id = '49ccaacd-d437-4421-809a-f957c8b4baf8'

lions_games_subcategory_submissions = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory(student_id, lions_games_subcategory_id)

print(lions_games_subcategory_submissions[0]['submission_data'])



{'notes': 'small_group monthly check #1', 'status': 'not_involved', 'submission_type': 'small_group'}
Community service hours example
{'hours': 4, 'description': 'Magni culpo necessitatibus sulum timidus aeneus sit ancilla adstringo corporis.', 'organization': 'Community Garden', 'date_of_service': '2025-10-07', 'submission_type': 'community_service', 'supervisor_name': 'Trevor West', 'supervisor_contact': 'Chad.Zemlak@yahoo.com'}
Particpation points example
{'notes': 'gbe_participation attendance #1', 'points': 1, 'submission_type': 'participation'}
Professional development categories
{'description': 'Aqua certus undique paens dolorum illo capio video abbas currus adnuo patruus.', 'organization': 'ACU Apex', 'assigned_points': 50, 'promotion_title': 'Assistant → Senior Assistant', 'submission_type': 'job_promotion', 'supervisor_name': 'Eugene Kunze', 'date_of_promotion': '2025-04-14', 'supervisor_contact': 'Fred.Hirthe@gmail.com'}
{'description': 'Synagoga provident audacia patior dem

In [21]:
#Testing get_subcategories
subcategories = SubcategoryAggregator(supabase_client).get_subcategories()
print(subcategories[0:5])

#Testing get_student_submissions
student_submissions = SubcategoryAggregator(supabase_client).get_student_submissions('7630d221-e3aa-4126-a0e3-bee715160247')
print(student_submissions[0:5])

#Testing get_student_submission_by_subcategory
student_submission = SubcategoryAggregator(supabase_client).get_student_submission_by_subcategory('7630d221-e3aa-4126-a0e3-bee715160247', 'f50830fe-b820-4223-89e2-e69241b459af')
print(student_submission)

[{'id': '8d13f1b9-33e1-4a62-be45-488a6834112f', 'category_id': 'fe870094-f029-4cd6-9af6-e42c735c4514', 'name': 'spiritual_formation_grade', 'display_name': 'Spiritual Formation Grade', 'data_source': 'populi', 'weight': 1.0, 'created_at': '2025-07-31T19:50:25.698898+00:00'}, {'id': '865e0e15-c14d-4b23-abd2-5f1b6ccf5dbc', 'category_id': 'fe870094-f029-4cd6-9af6-e42c735c4514', 'name': 'chapel_participation', 'display_name': 'Chapel Team Participation', 'data_source': 'student_input', 'weight': 1.0, 'created_at': '2025-07-31T19:50:25.698898+00:00'}, {'id': 'a32c3898-dbf1-4a92-a5db-811dfb6fcd0f', 'category_id': 'fe870094-f029-4cd6-9af6-e42c735c4514', 'name': 'small_group_involvement', 'display_name': 'Small Group Involvement', 'data_source': 'student_input', 'weight': 1.0, 'created_at': '2025-07-31T19:50:25.698898+00:00'}, {'id': 'e0bd5604-0692-42fe-8b4b-7ea2d339abc7', 'category_id': 'fe870094-f029-4cd6-9af6-e42c735c4514', 'name': 'dream_team_involvement', 'display_name': 'Dream Team Invol