In [1]:
import numpy as np
import pandas as pd

In [22]:
# Load patient master data
pt = pd.read_csv('../data/Train/csv/patients.csv')
co = pd.read_csv('../data/Train/csv/conditions.csv')

In [23]:
# Define the mapping from DESCRIPTION to result DataFrame column names
description_to_column = {
    'Body mass index 30+ - obesity (finding)': 'Obese',
    'Body mass index 40+ - severely obese (finding)': 'Obese',
    'Essential hypertension (disorder)': 'Hypertension',
    'Atrial fibrillation (disorder)': 'Atrial_Fibrillation',
    'Diabetes mellitus type 2 (disorder)': 'Diabetes_Type2',
    'Seizure disorder (disorder)': 'Seizure_Disorder',
    'Major depression single episode (disorder)': 'Depression_Anxiety',
    'Major depressive disorder (disorder)': 'Depression_Anxiety',
    'Pulmonary emphysema (disorder)': 'COPD',
    'Respiratory distress (finding)': 'COPD',
    'Anemia (disorder)': 'Anemia',
    'Chronic kidney disease stage 1 (disorder)': 'Chronic_Kidney_Disease',
    'Chronic kidney disease stage 2 (disorder)': 'Chronic_Kidney_Disease',
    'Chronic kidney disease stage 3 (disorder)': 'Chronic_Kidney_Disease',
    'Chronic kidney disease stage 4 (disorder)': 'Chronic_Kidney_Disease',
    'Chronic kidney disease stage 5 (disorder)': 'Chronic_Kidney_Disease',
    'Laceration - injury (disorder)': 'MildInjury_Pain',
    'Injury of knee (disorder)': 'MildInjury_Pain',
    'Injury of tendon of the rotator cuff of shoulder (disorder)': 'MildInjury_Pain',
    'Concussion injury of brain (disorder)': 'MildInjury_Pain',
    'Injury of neck (disorder)': 'MildInjury_Pain',
    'Childhood asthma (disorder)': 'Asthma',
    'Asthma (disorder)': 'Asthma',
    'Appendicitis (disorder)': 'Appendicitis',
    'Drug overdose': 'Drug_Overdose'
}

# Replace DESCRIPTION in 'co' with corresponding column names and rename to CONDITION_SIMPLE
co['CONDITION_SIMPLE'] = co['DESCRIPTION'].map(description_to_column)

# Filter out any rows in 'co' where 'CONDITION_SIMPLE' is NaN
co = co.dropna(subset=['CONDITION_SIMPLE'])

# Create dummy variables for the new 'CONDITION_SIMPLE' column
conditions = pd.get_dummies(co['CONDITION_SIMPLE'])
co_expanded = co[['PATIENT']].join(conditions)  # Only join the 'PATIENT' column with the dummy variables

# Aggregate conditions by patient
co_grouped = co_expanded.groupby('PATIENT').sum().clip(upper=1).reset_index()
co_grouped

Unnamed: 0,PATIENT,Anemia,Appendicitis,Asthma,Atrial_Fibrillation,COPD,Chronic_Kidney_Disease,Depression_Anxiety,Diabetes_Type2,Drug_Overdose,Hypertension,MildInjury_Pain,Obese,Seizure_Disorder
0,001eaca0-15dc-89b7-e88d-62e3b72e3c12,0,0,0,0,0,0,0,0,0,1,0,1,0
1,0027a2c3-6f86-b683-cac4-5ac409c14b13,0,0,0,0,1,0,0,0,0,0,0,1,0
2,005d13a9-dd3e-0d7b-f9cf-ed71c9595174,0,0,0,0,0,0,0,0,0,1,1,1,1
3,005eb147-c797-8c12-b36d-3ed503bc1acc,0,0,1,0,0,0,0,0,0,0,0,0,0
4,0069ce6b-9027-a5ee-93e4-11fc408d1f27,0,0,0,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4523,ffe9d142-ea7f-ed22-ad6a-20e437b271df,0,0,0,0,1,1,0,0,0,1,0,1,0
4524,ffeafc74-78d4-b2a8-b497-31b0035b9c67,0,0,0,0,0,0,0,0,0,0,1,1,0
4525,ffebe97f-caea-96c4-77c1-cd1131268228,1,0,0,0,0,0,0,0,0,0,1,1,0
4526,ffed5ac4-37d0-f163-5a01-335130f30a90,0,0,0,0,0,0,0,0,0,0,1,0,0


In [24]:

# Process patient information
today = pd.Timestamp('2025-01-01')
pt['Age'] = (today - pd.to_datetime(pt['BIRTHDATE'])).dt.days // 365
pt['Age_Pediatric'] = (pt['Age'] < 18).astype(int)
pt['Age_Elderly'] = (pt['Age'] > 65).astype(int)
pt['Sex_Male'] = (pt['GENDER'] == 'M').astype(int)
pt['Sex_Female'] = (pt['GENDER'] == 'F').astype(int)

# Merge patient info with conditions
pt_renamed = pt.rename(columns={'Id': 'PATIENT'})
patient_conditions = pt_renamed[['PATIENT', 'Age_Pediatric', 'Age_Elderly', 'Sex_Male', 'Sex_Female']]
patient_data = patient_conditions.merge(co_grouped, on='PATIENT', how='left').fillna(0)

# Ensure the result only has the columns you requested
final_columns = [
    'PATIENT', 'Age_Pediatric', 'Age_Elderly', 'Sex_Male', 'Sex_Female',
    'Obese', 'Hypertension', 'Atrial_Fibrillation', 'Diabetes_Type2',
    'Seizure_Disorder', 'Depression_Anxiety', 'COPD', 'Anemia',
    'Chronic_Kidney_Disease', 'MildInjury_Pain', 'Asthma', 'Appendicitis',
    'Drug_Overdose'
]
patient_data = patient_data[final_columns]
patient_data.rename(columns={'PATIENT': 'patient_id'}, inplace=True)


In [47]:
patient_data.head(2)

Unnamed: 0,patient_id,Age_Pediatric,Age_Elderly,Sex_Male,Sex_Female,Obese,Hypertension,Atrial_Fibrillation,Diabetes_Type2,Seizure_Disorder,Depression_Anxiety,COPD,Anemia,Chronic_Kidney_Disease,MildInjury_Pain,Asthma,Appendicitis,Drug_Overdose
0,80708456-1afe-9125-dd92-932ed402cf83,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,7a219b32-8414-b18b-d7f6-0907e0e2b1af,0,0,1,0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


In [46]:
# # Mock patient data and the modifications to the master spec
# patient_data = pd.DataFrame({
#     'patient_id': [1, 2],
#     'age': [25, 70],
#     'sex': ['M', 'F'],
#     'Obese': [1, 0],
#     'Hypertension': [0, 1],
#     'Atrial_Fibrillation': [1, 0]
# })


# Base transition matrix
basis_transition_matrix = {
    "states": [
        "Neutral", "Cardiac Ischaemia", "Sepsis", "Acute Anxiety/Panic",
        "Breathing Difficulty", "Hypovolaemia", "Arrhythmic Flare", "Hypoglycemia",
        "TIA", "Bathroom (harmless)", "White Coat Syndrome (harmless)", "STEMI (crisis)",
        "Septic Shock (crisis)", "Compromised Airway (crisis)", "Haemorrhagic Shock (crisis)",
        "Stroke (crisis)", "Death"
    ],
    "transition_matrix": [
        [0.70, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.05, 0.05, 0, 0, 0, 0, 0, 0],
        [0.50, 0.30, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.20, 0, 0, 0, 0, 0],
        [0.15, 0, 0.45, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.40, 0, 0, 0, 0],
        [0.80, 0, 0, 0.20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0.50, 0, 0, 0, 0.35, 0, 0, 0, 0, 0, 0, 0, 0.15, 0, 0, 0, 0],
        [0.35, 0, 0, 0, 0, 0.40, 0, 0, 0, 0, 0, 0, 0, 0, 0.25, 0, 0],
        [0.65, 0, 0, 0, 0, 0, 0.20, 0, 0, 0, 0, 0.15, 0, 0, 0, 0, 0],
        [0.75, 0, 0, 0, 0, 0, 0, 0.17, 0, 0, 0, 0, 0, 0.08, 0, 0, 0],
        [0.70, 0, 0, 0, 0, 0, 0, 0, 0.10, 0, 0, 0, 0, 0, 0, 0.20, 0],
        [1.00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1.00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0.80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.20],
        [0, 0, 0.90, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.10],
        [0, 0, 0, 0, 0, 0, 0, 0.50, 0, 0, 0, 0, 0, 0, 0, 0, 0.50],
        [0, 0, 0, 0, 0, 0.70, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.30],
        [0, 0, 0, 0, 0, 0, 0, 0, 0.85, 0, 0, 0, 0, 0, 0, 0, 0.15],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.00]
    ]
}


# Define the MASTER_SPEC as described in the user's scenario
MASTER_SPEC = {
    "Age_Pediatric": {"risk_modifiers": {"sepsis_warning": 1.0, "pre_mi_warning": 0.5}},
    "Age_Elderly": {"risk_modifiers": {"pre_mi_warning": 1.5, "sepsis_warning": 1.5}},
    "Sex_Male": {"risk_modifiers": {"pre_mi_warning": 1.2}},
    "Sex_Female": {"risk_modifiers": {"pre_mi_warning": 1.0}},
    "Obese": {"risk_modifiers": {"pre_mi_warning": 2.0, "stemi_crisis": 1.5, "sepsis_warning": 1.0}},
    "Underweight": {"risk_modifiers": {"infection_warning": 1.3}},
    "Hypertension": {"risk_modifiers": {"pre_mi_warning": 2.0, "stemi_crisis_if_in_pre_mi_warning": 1.5}},
    "Atrial_Fibrillation": {"risk_modifiers": {"pre_mi_warning": 1.2, "stroke_crisis": 2.0}},
    "Diabetes_Type2": {"risk_modifiers": {"hypoglycemia_warning": 1.5}},
    "Seizure_Disorder": {"risk_modifiers": {"seizure_crisis": 1.5}},
    "Depression_Anxiety": {"risk_modifiers": {"panic_warning": 2.0}},
    "COPD": {"risk_modifiers": {"breathing_difficulty_warning": 1.5, "compromised_airway_crisis_if_in_breathing_difficulty": 1.3}},
    "Hemophilia": {"risk_modifiers": {"hypovolemia_warning": 2.0, "hemorrhagic_crisis_if_in_hypovolemia": 2.0}},
    "Anemia": {"risk_modifiers": {"hemorrhagic_crisis_if_in_hypovolemia": 1.5, "breathing_difficulty_warning": 1.2}},
    "Chronic_Kidney_Disease": {"risk_modifiers": {"sepsis_warning": 1.2, "hemorrhagic_crisis_if_in_hypovolemia": 1.2}},
    "MildInjury_Pain": {},
    "ChestPain": {"risk_modifiers": {"cardiac_ischaemia_warning": 0.8}},
    "Asthma": {"risk_modifiers": {"breathing_difficulty_warning": 1.0}},
    "Appendicitis": {"risk_modifiers": {"sepsis_warning": 1.3}},
    "Drug_Overdose": {"risk_modifiers": {"breathing_difficulty_warning": 1.5}},
}

state_mapping = {
    "sepsis_warning": "Sepsis",
    "pre_mi_warning": "Cardiac Ischaemia",
    "stemi_crisis": "STEMI (crisis)",
    "hypoglycemia_warning": "Hypoglycemia",
    "seizure_crisis": "TIA",
    "panic_warning": "Acute Anxiety/Panic",
    "breathing_difficulty_warning": "Breathing Difficulty",
    "compromised_airway_crisis_if_in_breathing_difficulty": "Compromised Airway (crisis)",
    "hypovolemia_warning": "Hypovolaemia",
    "hemorrhagic_crisis_if_in_hypovolemia": "Haemorrhagic Shock (crisis)",
    "stroke_crisis": "Stroke (crisis)"
}


def create_patient_specific_matrices(basis_transition_matrix, patient_data, master_spec, state_mapping):
    states = basis_transition_matrix['states']
    base_matrix = np.array(basis_transition_matrix['transition_matrix'])
    
    patient_matrices = {}
    
    for index, patient in patient_data.iterrows():
        #print(patient)
        patient_matrix = np.copy(base_matrix)
        
        for characteristic, details in master_spec.items():
            #print(characteristic, details)
            if patient.get(characteristic, 0) == 1 and "risk_modifiers" in details:
                #print(1)
                for modified_state, modifier in details['risk_modifiers'].items():
                    #print(modified_state, modifier)
                    if modified_state in state_mapping and state_mapping[modified_state] in states:
                        #print(2)
                        from_state_index = states.index(state_mapping[modified_state])
                        #print(from_state_index)
                        for to_state_index, _ in enumerate(states):
                            #print('current:', patient_matrix[to_state_index, from_state_index])
                            patient_matrix[to_state_index, from_state_index] *= modifier
                        
        #print(patient_matrix)
        #break
        
        patient_matrices[patient['patient_id']] = patient_matrix
    
    return patient_matrices


# Applying the function with the mapping
patient_matrices = create_patient_specific_matrices(basis_transition_matrix, patient_data, MASTER_SPEC, state_mapping)
patient_matrices


{'80708456-1afe-9125-dd92-932ed402cf83': array([[0.7  , 0.03 , 0.025, 0.025, 0.025, 0.025, 0.025, 0.025, 0.025,
         0.05 , 0.05 , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.5  , 0.36 , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
         0.   , 0.   , 0.2  , 0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.15 , 0.   , 0.45 , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
         0.   , 0.   , 0.   , 0.4  , 0.   , 0.   , 0.   , 0.   ],
        [0.8  , 0.   , 0.   , 0.2  , 0.   , 0.   , 0.   , 0.   , 0.   ,
         0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ],
        [0.5  , 0.   , 0.   , 0.   , 0.35 , 0.   , 0.   , 0.   , 0.   ,
         0.   , 0.   , 0.   , 0.15 , 0.   , 0.   , 0.   , 0.   ],
        [0.35 , 0.   , 0.   , 0.   , 0.   , 0.4  , 0.   , 0.   , 0.   ,
         0.   , 0.   , 0.   , 0.   , 0.   , 0.25 , 0.   , 0.   ],
        [0.65 , 0.   , 0.   , 0.   , 0.   , 0.   , 0.2  , 0.   , 0.   ,
         0.   , 0.   , 0.15 , 0.   , 0.   , 0.   , 0.   , 0.