In [None]:
import os
import sys
import json
import pandas as pd

os.chdir('/data/repos/actin-personalization/prediction')
sys.path.insert(0, os.path.abspath("src/main/python"))

from data.lookups import lookup_manager

with open("/data/repos/actin-personalization/prediction/src/test/resources/crc_patient_record.json") as f:
    patient = json.load(f)

In [None]:
def load_patient_df(patient):
    tumor = patient.get("tumor", {})
    clinical_status = patient.get("clinicalStatus", {})
    comorbidities = patient.get("comorbidities", [])
    molecular_tests = patient.get("molecularHistory", {}).get("molecularTests", [])
    lab_values = {lab["measurement"]: lab for lab in patient.get("labValues", [])}

    def has_icd(icd_codes: list[str]) -> bool:
        for c in comorbidities:
            for icd in c.get("icdCodes", []):
                if any(icd.startswith(code) for code in icd_codes):
                    return True
        return False


    variant_genes = {v.get("gene"): v for test in molecular_tests for v in test.get("drivers", {}).get("variants", [])}

    patient_dict = {
        "sex": patient.get("patient", {}).get("gender"),
        "ageAtMetastaticDiagnosis": 2025 - patient.get("patient", {}).get("birthYear", 0),
        "numberOfPriorTumors": len(tumor.get("priorPrimaries", [])),
        "hasDoublePrimaryTumor": any(p.get("status") == "ACTIVE" for p in tumor.get("priorPrimaries", [])),

        # "primaryTumorType": #TODO #see doids.json (but: no specific code for specific CRC variations?)
        # "primaryTumorTypeLocation": , #TODO
        # "sidedness": , #TODO

        # "anorectalVergeDistanceCategory": #TODO,
        # "mesorectalFasciaIsClear": #TODO
        # "distanceToMesorectalFasciaMm": #TODO,

        # "differentiationGrade": #TODO,
        # "clinicalTnmT": #TODO,
        # "clinicalTnmN": #TODO
        # "clinicalTnmM": #TODO,
        # "pathologicalTnmT": #TODO,
        # "pathologicalTnmN": #TODO,
        # "pathologicalTnmM": #TODO,
        "clinicalTumorStage": tumor.get("stage"),
        "pathologicalTumorStage": tumor.get("stage"),

        # "investigatedLymphNodesCountPrimaryDiagnosis": #TODO,
        # "positiveLymphNodesCountPrimaryDiagnosis": #TODO
        # "presentedWithIleus": #TODO,
        # "presentedWithPerforation": #TODO,
        # "extraMuralInvasionCategory": #TODO,
        # "tumorRegression": #TODO
        # "daysBetweenPrimaryAndMetastaticDiagnosis": #TODO,
        "hasLiverOrIntrahepaticBileDuctMetastases": tumor.get("hasLiverLesions"),
        # "numberOfLiverMetastases": #TODO,
        # "maximumSizeOfLiverMetastasisMm": #TODO,
        "hasLymphNodeMetastases": tumor.get("hasLymphNodeLesions"),
        # "investigatedLymphNodesCountMetastaticDiagnosis": #TODO,
        # "positiveLymphNodesCountMetastaticDiagnosis": #TODO,
        # "hasPeritonealMetastases": #TODO,
        "hasBronchusOrLungMetastases": tumor.get("hasLungLesions"),
        "hasBrainMetastases": tumor.get("hasBrainLesions"),
        "hasOtherMetastases": bool(tumor.get("otherLesions")),

        "whoAssessmentAtMetastaticDiagnosis": clinical_status.get("who"),
        # "asaAssessmentAtMetastaticDiagnosis": #TODO,
        "lactateDehydrogenaseAtMetastaticDiagnosis": lab_values.get("LACTATE_DEHYDROGENASE", {}).get("value"),
        "alkalinePhosphataseAtMetastaticDiagnosis": lab_values.get("ALKALINE_PHOSPHATASE", {}).get("value"),
        "leukocytesAbsoluteAtMetastaticDiagnosis": lab_values.get("LEUKOCYTES_ABS", {}).get("value"),
        "carcinoembryonicAntigenAtMetastaticDiagnosis": lab_values.get("CARCINOEMBRYONIC_ANTIGEN", {}).get("value"),
        "albumineAtMetastaticDiagnosis": lab_values.get("ALBUMIN", {}).get("value"),
        "neutrophilsAbsoluteAtMetastaticDiagnosis": lab_values.get("NEUTROPHILS_ABS", {}).get("value"),

        # "hasHadPrimarySurgeryPriorToMetastaticTreatment": #TODO,
        # "hasHadPrimarySurgeryDuringMetastaticTreatment": #TODO,
        # "hasHadGastroenterologySurgeryPriorToMetastaticTreatment": #TODO,
        # "hasHadGastroenterologySurgeryDuringMetastaticTreatment": #TODO,
        # "hasHadHipecPriorToMetastaticTreatment": #TODO,
        # "hasHadHipecDuringMetastaticTreatment": #TODO,
        # "hasHadPrimaryRadiotherapyPriorToMetastaticTreatment": #TODO,
        # "hasHadPrimaryRadiotherapyDuringMetastaticTreatment": #TODO,
        # "hasHadMetastaticSurgery": #TODO,
        # "hasHadMetastaticRadiotherapy": #TODO,

        # "charlsonComorbidityIndex": #TODO,
        "hasAids": has_icd(["B24"]),
        "hasCongestiveHeartFailure": has_icd(["I50"]),
        "hasCollagenosis": has_icd(["M35.9", "M35.8", "L87.1"]),
        "hasCopd": has_icd(["J40", "J42", "J43" , "J44"]),
        "hasCerebrovascularDisease": has_icd(["I60", "I61", "I62", "I63", "I64", "I65", "I66", "I67", "I68", "I69"]),
        "hasDementia": has_icd(["F00", "F01", "F02", "F03"]),
        "hasDiabetesMellitus": has_icd(["E10", "E11", "E12", "E13", "E14"]),
        "hasDiabetesMellitusWithEndOrganDamage": has_icd(["E10.2", "E11.2", "E12.2", "E13.2", "E14.2"]),
        # "hasOtherMalignancy": ,
        # "hasOtherMetastaticSolidTumor":  ,
        "hasMyocardialInfarct": has_icd(["I21"]),
        "hasMildLiverDisease": has_icd(["K70"]),
        "hasHemiplegiaOrParaplegia": has_icd(["G81", "G82"]),
        "hasPeripheralVascularDisease": has_icd(["I73"]),
        "hasRenalDisease": has_icd(["N10", "N11", "N12", "N13", "N14", "N15", "N16", "N17", "N18", "N19"]),
        "hasLiverDisease": has_icd([ "K71", "K72", "K73", "K74", "K75", "K76"]),
        "hasUlcerDisease": has_icd(["K25", "K26", "K27", "K28"]),

        "hasMsi": any(
            test.get("characteristics", {}).get("microsatelliteStability", {}).get("isUnstable", False)
            for test in molecular_tests
        ),
        "hasBrafMutation": "BRAF" in variant_genes,
        "hasBrafV600EMutation": "BRAF" in variant_genes and "V600E" in variant_genes["BRAF"].get("event", ""),
        "hasRasMutation": any(gene in variant_genes for gene in ["KRAS", "NRAS", "HRAS"]),
        "hasKrasG12CMutation": "KRAS" in variant_genes and "G12C" in variant_genes["KRAS"].get("event", ""),
    }

    patient_dict = {key: patient_dict.get(key, None) for key in lookup_manager.features}

    return pd.DataFrame([patient_dict])

# Extract and convert to a DataFrame
df_extracted = load_patient_df(patient)

# Display
df_extracted.head()