In [1]:
from __future__ import annotations
import requests
from typing import Any, Dict, List, Set, Optional
from datetime import datetime
import json

In [2]:
class Helper:
    def pretty_json(ip):
        return json.dumps(ip, indent=2, ensure_ascii=False)

In [3]:
class DataGathering(Helper):
    def __init__(self,
                student_id:str,
                preferred_languages:str = "th",
                target_roles_raw:str    = "Data Analyst",
                interests_raw:str       = "ฝึกงาน",
                onboard_grp:str         = "Job_hunter",          # What type of user is this, at a business level? [Job_Hundter, Student, Career_switcher, Professional, Upskill, Explorer]
                onboard_desc:str        = "เตรียมฝึกงานสายข้อมูล",   # What is the user actually trying to do right now?
    ):
        self.student_id          = student_id
        self.preferred_languages = preferred_languages
        self.target_roles_raw    = target_roles_raw
        self.interests_raw       = interests_raw
        self.onboard_grp         = onboard_grp
        self.onboard_desc        = onboard_desc
        try:
            self.resp_body       = self._request_full_student_profile()
        except:
            self.resp_body = None

    def __str__(self):
        p0 = "Input\n"
        p1 = f"self.preferred_languages = {self.preferred_languages}\n"
        p2 = f"self.target_roles_raw    = {self.target_roles_raw}\n"
        p3 = f"self.interests_raw       = {self.interests_raw}\n"
        p4 = f"self.onboard_grp         = {self.onboard_grp}\n"
        p5 = f"self.onboard_desc        = {self.onboard_desc}\n"
        return p0+p1+p2+p3+p4+p5
        
    def _request_full_student_profile(self) -> dict:
        BASE_URL = "https://eport-data-api-810737581373.asia-southeast1.run.app"
        url  = f"{BASE_URL}/v1/students/{self.student_id}/full-profile"
        resp = requests.get(url, timeout=10)
        resp.raise_for_status()
        return resp.json()
    
    @property
    def _skill(self):
        if self.resp_body["student_profile"]['skills']:
            skill_with_level_dictionary = {}
            for skill in self.resp_body["student_profile"]['skills']:
                skill_with_level_dictionary[skill["skill_name"]] = skill["skill_level"]
            str_skill_with_level_dictionary = ""
            for skill,level in skill_with_level_dictionary.items():
                str_skill_with_level_dictionary += f"{skill}:{level.split("_")[0]}|"
            return str_skill_with_level_dictionary
        return "-"
    
    def _LastDate(self,dates:list)->str:
        last_date = max(
            dates,
            key = lambda d: datetime.strptime(d, "%d/%m/%Y")
        )
        return last_date

    @property
    def _edu_level_and_major(self):
        if self.resp_body["student_profile"]["education"]:
            dates = []
            for i in self.resp_body["student_profile"]["education"]:
                dates.append(i["graduation_date"])
            last_date = self._LastDate(dates)
            for education_order,education_detail in enumerate(self.resp_body["student_profile"]["education"]):
                if last_date == education_detail["graduation_date"]:
                    return {
                        "degree": self.resp_body["student_profile"]["education"][education_order]["degree"],
                        "major" : self.resp_body["student_profile"]["education"][education_order]["major"]
                    }
            return None
        return{
            "degree":"-",
            "major":"-"
            }
    
    def _parse_date_safe(self,d):
        if not d or d == "-":
            return None
        try:
            return datetime.strptime(d,"%d/%m/%Y")
        except ValueError:
            return None
        
    def _get_latest_education(self,education_list:list)->dict|None:
        dated_edu = []
        for edu in education_list:
            grad_date = self._parse_date_safe(edu.get("graduation_date"))
            dated_edu.append((grad_date,edu))
        dated_edu.sort(key=lambda x: (x[0] is None, x[0]),reverse=True)
        return dated_edu[0][1] if dated_edu else None

    @property
    def _infer_current_status(self):
        education  = self.resp_body["student_profile"].get("education",[])
        experience = self.resp_body["student_profile"].get("experience",[])
        if not education:
            return "unknown"
        latest_edu = self._get_latest_education(education)
        grad_date  = latest_edu.get("graduation_date")
        # Rule 1: Still studying
        if not grad_date or grad_date == "-":
            return "student"
        # Rule 2 & 3: Graduated
        if experience and len(experience) > 0:
            return "professional"
        return "graduate"

    def gathering(self):
        if not self.resp_body:
            raise RuntimeError(f"Student {self.student_id} profile not loaded")
        # Ops = self._edu_level_and_major()
        # print(Ops) 
        # self._infer_current_status
        return {
                "student_id"         : self.student_id,
                "preferred_language" : self.preferred_languages,
                "current_status"     : self._infer_current_status,
                "edu_level"          : self._edu_level_and_major["degree"],
                "edu_major"          : self._edu_level_and_major["major"],
                "target_roles_raw"   : self.target_roles_raw,
                "skills_raw"         : self._skill,
                "interests_raw"      : self.interests_raw,
                "onboard_grp"        : self.onboard_grp,
                "onboard_desc"       : self.onboard_desc
            }     

In [4]:
dg01 = DataGathering(
    student_id           = "U-1001",
    preferred_languages  = "th",
    target_roles_raw     = "Data Analyst",
    interests_raw        = "ฝึกงาน",
    onboard_grp          = "Job_hunter",          # ???
    onboard_desc         = "เตรียมฝึกงานสายข้อมูล",   # ???
)
dg01.gathering()

{'student_id': 'U-1001',
 'preferred_language': 'th',
 'current_status': 'professional',
 'edu_level': 'PhD in Biological Science',
 'edu_major': 'Biological Science',
 'target_roles_raw': 'Data Analyst',
 'skills_raw': 'Data Analysis:L2|Lab Safety:L2|Synthetic Biology:L3|Metabolic Engineering:L3|Bioprocess Optimization:L3|Project Management:L3|Technical Presentation:L3|Documentation:L3|Molecular Biology:L4|Scientific Writing:L4|',
 'interests_raw': 'ฝึกงาน',
 'onboard_grp': 'Job_hunter',
 'onboard_desc': 'เตรียมฝึกงานสายข้อมูล'}

In [5]:
dg02 = DataGathering(
    student_id           = "U-1002",
    preferred_languages  = "th",
    target_roles_raw     = "Data Analyst",
    interests_raw        = "ฝึกงาน",
    onboard_grp          = "Job_hunter",          # ???
    onboard_desc         = "เตรียมฝึกงานสายข้อมูล",   # ???
)
dg02.gathering()

{'student_id': 'U-1002',
 'preferred_language': 'th',
 'current_status': 'professional',
 'edu_level': 'BSc in Computer Science',
 'edu_major': 'Computer Science',
 'target_roles_raw': 'Data Analyst',
 'skills_raw': 'Team Leadership:L2|MLOps:L3|Vector Databases:L3|Data Engineering:L3|Cloud Architecture:L3|System Design:L3|LLM Engineering:L4|Python Programming:L4|NLP:L4|RAG Engineering:L4|',
 'interests_raw': 'ฝึกงาน',
 'onboard_grp': 'Job_hunter',
 'onboard_desc': 'เตรียมฝึกงานสายข้อมูล'}

In [6]:
dg03 = DataGathering(
    student_id           = "U-1003",
    preferred_languages  = "th",
    target_roles_raw     = "Data Analyst",
    interests_raw        = "ฝึกงาน",
    onboard_grp          = "Job_hunter",          # ???
    onboard_desc         = "เตรียมฝึกงานสายข้อมูล",   # ???
)
dg03.gathering()

{'student_id': 'U-1003',
 'preferred_language': 'th',
 'current_status': 'graduate',
 'edu_level': 'High School',
 'edu_major': 'Science',
 'target_roles_raw': 'Data Analyst',
 'skills_raw': 'Data Visualization:L2|Business Analysis:L2|Presentation Skills:L2|Research Skills:L2|Excel:L3|SQL:L3|Communication Skills:L3|Problem Solving:L3|Teamwork:L3|Critical Thinking:L3|',
 'interests_raw': 'ฝึกงาน',
 'onboard_grp': 'Job_hunter',
 'onboard_desc': 'เตรียมฝึกงานสายข้อมูล'}

In [7]:
dg04 = DataGathering(
    student_id           = "U-1004",
    preferred_languages  = "th",
    target_roles_raw     = "Data Analyst",
    interests_raw        = "ฝึกงาน",
    onboard_grp          = "Job_hunter",          # ???
    onboard_desc         = "เตรียมฝึกงานสายข้อมูล",   # ???
)
dg04.gathering()

{'student_id': 'U-1004',
 'preferred_language': 'th',
 'current_status': 'unknown',
 'edu_level': '-',
 'edu_major': '-',
 'target_roles_raw': 'Data Analyst',
 'skills_raw': 'Metabolic Engineering:L3|',
 'interests_raw': 'ฝึกงาน',
 'onboard_grp': 'Job_hunter',
 'onboard_desc': 'เตรียมฝึกงานสายข้อมูล'}

In [8]:
dg05 = DataGathering(
    student_id           = "U-1005",
    preferred_languages  = "th",
    target_roles_raw     = "Data Analyst",
    interests_raw        = "ฝึกงาน",
    onboard_grp          = "Job_hunter",          # ???
    onboard_desc         = "เตรียมฝึกงานสายข้อมูล",   # ???
)
dg05.gathering()

RuntimeError: Student U-1005 profile not loaded