# Subcategory Aggregators (nbdev)

Implement and test subcategory aggregation logic here. Export production code with `#| export` cells.


In [None]:
#| default_export aggregators
#| export
import logging
from datetime import datetime
from typing import Optional, Dict, Any

from supabase import Client

logger = logging.getLogger(__name__)

class SubcategoryAggregator:
    COMMUNITY_SERVICE_CAP = 12
    DAILY_HOURS_CAP = 8
    MAX_RATING_SCALE = 10

    BINARY_ATTENDANCE_SUBCATEGORIES = {
        'chapel_attendance', 'fellow_friday_attendance', 'gbe_attendance', 'company_community_events'
    }
    STAFF_ASSIGNED_SUBCATEGORIES = {
        'credentials_certifications', 'job_promotion_opportunities'
    }
    BINARY_MONTHLY_CHECK_SUBCATEGORIES = {
        'small_group_involvement', 'dream_team_involvement'
    }
    PERFORMANCE_RATING_SUBCATEGORIES = {
        'chapel_participation', 'company_team_building'
    }

    def __init__(self, supabase_client: Client):
        self.supabase = supabase_client
        logger.info("SubcategoryAggregator initialized")

    async def _get_subcategory_id_by_name(self, name: str) -> Optional[str]:
        resp = self.supabase.table('subcategories').select('id').eq('name', name).limit(1).execute()
        if resp.data:
            return resp.data[0]['id']
        return None

    async def aggregate_subcategory_score(self, student_id: str, subcategory_name: str, academic_year: int) -> float:
        try:
            if subcategory_name == 'community_service_hours':
                result = await self.aggregate_community_service_hours(student_id, academic_year)
                return result['capped_hours']
            elif subcategory_name in self.BINARY_ATTENDANCE_SUBCATEGORIES:
                result = await self.aggregate_attendance_percentage(student_id, subcategory_name, academic_year)
                return result['attendance_percentage']
            elif subcategory_name in self.STAFF_ASSIGNED_SUBCATEGORIES:
                result = await self.aggregate_staff_assigned_points(student_id, subcategory_name, academic_year)
                return result['total_points']
            elif subcategory_name in self.BINARY_MONTHLY_CHECK_SUBCATEGORIES:
                result = await self.aggregate_binary_monthly_check(student_id, subcategory_name, academic_year)
                return result['participation_percentage']
            elif subcategory_name in self.PERFORMANCE_RATING_SUBCATEGORIES:
                result = await self.aggregate_performance_ratings(student_id, subcategory_name, academic_year)
                return result['converted_score']
            elif subcategory_name == 'fellow_friday_participation':
                result = await self.aggregate_points_based_scoring(student_id, subcategory_name, academic_year)
                return result['total_points']
            elif subcategory_name == 'gbe_participation':
                result = await self.aggregate_attendance_plus_bonus(student_id, subcategory_name, academic_year)
                return result['total_score']
            else:
                logger.warning(f"Unknown subcategory type: {subcategory_name}")
                return 0.0
        except Exception as e:
            logger.error(f"Error aggregating {subcategory_name} for student {student_id}: {str(e)}")
            return 0.0

    async def aggregate_community_service_hours(self, student_id: str, academic_year: int) -> Dict[str, Any]:
        response = self.supabase.table('event_submissions').select('id, submission_data, submitted_at').eq('student_id', student_id).execute()
        if not response.data:
            return {'total_hours': 0, 'capped_hours': 0, 'submission_count': 0, 'submissions': []}
        total_hours = 0
        submission_details = []
        for submission in response.data:
            d = submission.get('submission_data', {})
            if d.get('submission_type') != 'community_service':
                continue
            submitted_date = datetime.fromisoformat(submission['submitted_at']).date()
            if submitted_date.year != academic_year:
                continue
            hours = float(d.get('hours', 0))
            capped_daily = min(hours, self.DAILY_HOURS_CAP)
            total_hours += capped_daily
            submission_details.append({'submission_id': submission['id'], 'date': submitted_date.isoformat(), 'raw_hours': hours, 'capped_hours': capped_daily})
        capped_total = min(total_hours, self.COMMUNITY_SERVICE_CAP)
        return {'total_hours': total_hours, 'capped_hours': capped_total, 'submission_count': len(submission_details), 'submissions': submission_details, 'cap_applied': total_hours > self.COMMUNITY_SERVICE_CAP}

    async def aggregate_attendance_percentage(self, student_id: str, subcategory_name: str, academic_year: int) -> Dict[str, Any]:
        subcategory_id = await self._get_subcategory_id_by_name(subcategory_name)
        if not subcategory_id:
            return {'attendance_percentage': 0, 'present_count': 0, 'total_count': 0, 'submissions': []}
        response = self.supabase.table('event_submissions').select('id, submission_data, submitted_at').eq('student_id', student_id).eq('subcategory_id', subcategory_id).execute()
        if not response.data:
            return {'attendance_percentage': 0, 'present_count': 0, 'total_count': 0, 'submissions': []}
        present = 0
        total = 0
        subs = []
        for s in response.data:
            d = s.get('submission_data', {})
            if d.get('submission_type') != 'attendance':
                continue
            submitted = datetime.fromisoformat(s['submitted_at']).date()
            if submitted.year != academic_year:
                continue
            status = d.get('status', 'absent')
            if status == 'present':
                present += 1
            total += 1
            subs.append({'submission_id': s['id'], 'date': submitted.isoformat(), 'status': status})
        pct = (present / total * 100) if total > 0 else 0
        return {'attendance_percentage': round(pct, 2), 'present_count': present, 'total_count': total, 'submissions': subs}

    async def aggregate_staff_assigned_points(self, student_id: str, subcategory_name: str, academic_year: int) -> Dict[str, Any]:
        subcategory_id = await self._get_subcategory_id_by_name(subcategory_name)
        if not subcategory_id:
            return {'total_points': 0, 'submission_count': 0, 'submissions': []}
        response = self.supabase.table('event_submissions').select('id, submission_data, submitted_at').eq('student_id', student_id).eq('subcategory_id', subcategory_id).execute()
        if not response.data:
            return {'total_points': 0, 'submission_count': 0, 'submissions': []}
        total_points = 0
        subs = []
        target_types = ['credentials'] if subcategory_name == 'credentials_certifications' else ['job_promotion'] if subcategory_name == 'job_promotion_opportunities' else []
        for s in response.data:
            d = s.get('submission_data', {})
            t = d.get('submission_type', '')
            if t not in target_types:
                continue
            submitted = datetime.fromisoformat(s['submitted_at']).date()
            if submitted.year != academic_year:
                continue
            pts = float(d.get('assigned_points', 0))
            total_points += pts
            subs.append({'submission_id': s['id'], 'date': submitted.isoformat(), 'points': pts, 'type': t})
        return {'total_points': total_points, 'submission_count': len(subs), 'submissions': subs}

    async def aggregate_performance_ratings(self, student_id: str, subcategory_name: str, academic_year: int) -> Dict[str, Any]:
        subcategory_id = await self._get_subcategory_id_by_name(subcategory_name)
        if not subcategory_id:
            return {'average_rating': 0, 'converted_score': 0, 'rating_count': 0, 'submissions': []}
        response = self.supabase.table('event_submissions').select('id, submission_data, submitted_at').eq('student_id', student_id).eq('subcategory_id', subcategory_id).execute()
        if not response.data:
            return {'average_rating': 0, 'converted_score': 0, 'rating_count': 0, 'submissions': []}
        ratings = []
        subs = []
        for s in response.data:
            d = s.get('submission_data', {})
            submitted = datetime.fromisoformat(s['submitted_at']).date()
            if submitted.year != academic_year:
                continue
            rating = d.get('rating')
            if rating is not None:
                r = float(rating)
                ratings.append(r)
                subs.append({'submission_id': s['id'], 'date': submitted.isoformat(), 'rating': r})
        avg = sum(ratings) / len(ratings) if ratings else 0
        converted = avg * 10 if 0 < avg <= self.MAX_RATING_SCALE else avg
        return {'average_rating': round(avg, 2), 'converted_score': round(converted, 2), 'rating_count': len(ratings), 'submissions': subs}

    async def aggregate_binary_monthly_check(self, student_id: str, subcategory_name: str, academic_year: int) -> Dict[str, Any]:
        subcategory_id = await self._get_subcategory_id_by_name(subcategory_name)
        if not subcategory_id:
            return {'participation_percentage': 0, 'present_count': 0, 'total_count': 0, 'submissions': []}
        response = self.supabase.table('event_submissions').select('id, submission_data, submitted_at').eq('student_id', student_id).eq('subcategory_id', subcategory_id).execute()
        if not response.data:
            return {'participation_percentage': 0, 'present_count': 0, 'total_count': 0, 'submissions': []}
        present = 0
        total = 0
        subs = []
        target_types = ['small_group'] if subcategory_name == 'small_group_involvement' else ['dream_team'] if subcategory_name == 'dream_team_involvement' else []
        for s in response.data:
            d = s.get('submission_data', {})
            t = d.get('submission_type', '')
            if t not in target_types:
                continue
            submitted = datetime.fromisoformat(s['submitted_at']).date()
            if submitted.year != academic_year:
                continue
            status = d.get('status', 'absent')
            if status == 'present':
                present += 1
            total += 1
            subs.append({'submission_id': s['id'], 'date': submitted.isoformat(), 'status': status})
        pct = (present / total * 100) if total > 0 else 0
        return {'participation_percentage': round(pct, 2), 'present_count': present, 'total_count': total, 'submissions': subs}

    async def aggregate_points_based_scoring(self, student_id: str, subcategory_name: str, academic_year: int) -> Dict[str, Any]:
        subcategory_id = await self._get_subcategory_id_by_name(subcategory_name)
        if not subcategory_id:
            return {'total_points': 0, 'submission_count': 0, 'submissions': []}
        response = self.supabase.table('event_submissions').select('id, submission_data, submitted_at').eq('student_id', student_id).eq('subcategory_id', subcategory_id).execute()
        if not response.data:
            return {'total_points': 0, 'submission_count': 0, 'submissions': []}
        total = 0
        subs = []
        for s in response.data:
            d = s.get('submission_data', {})
            if d.get('submission_type') != 'fellow_friday':
                continue
            submitted = datetime.fromisoformat(s['submitted_at']).date()
            if submitted.year != academic_year:
                continue
            points = float(d.get('points', 0))
            total += points
            subs.append({'submission_id': s['id'], 'date': submitted.isoformat(), 'points': points})
        return {'total_points': total, 'submission_count': len(subs), 'submissions': subs}

    async def aggregate_attendance_plus_bonus(self, student_id: str, subcategory_name: str, academic_year: int) -> Dict[str, Any]:
        subcategory_id = await self._get_subcategory_id_by_name(subcategory_name)
        if not subcategory_id:
            return {'attendance_percentage': 0, 'bonus_points': 0, 'total_score': 0, 'attendance_submissions': [], 'bonus_submissions': []}
        attendance = await self.aggregate_attendance_percentage(student_id, subcategory_name, academic_year)
        response = self.supabase.table('event_submissions').select('id, submission_data, submitted_at').eq('student_id', student_id).eq('subcategory_id', subcategory_id).execute()
        bonus_points = 0
        bonus_submissions = []
        if response.data:
            for s in response.data:
                d = s.get('submission_data', {})
                if d.get('submission_type') != 'gbe_participation':
                    continue
                submitted = datetime.fromisoformat(s['submitted_at']).date()
                if submitted.year != academic_year:
                    continue
                bonus = float(d.get('bonus_points', 0))
                bonus_points += bonus
                if bonus > 0:
                    bonus_submissions.append({'submission_id': s['id'], 'date': submitted.isoformat(), 'bonus_points': bonus})
        total_score = attendance['attendance_percentage'] + bonus_points
        return {'attendance_percentage': attendance['attendance_percentage'], 'bonus_points': bonus_points, 'total_score': round(total_score, 2), 'attendance_submissions': attendance['submissions'], 'bonus_submissions': bonus_submissions}
