In [49]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
PATIENT_LIST = [
    775,  787,  788, 1123, 1169, 1170, 1171, 1172, 1173, 1983, 2110, 2195,
    2955, 2956, 2957, 2958, 2959, 2960, 2961, 2962, 2963, 3081, 3229, 3318, 3432
]

### LOADING

In [51]:
from ai_cdss.services.data import DataLoader
loader = DataLoader(PATIENT_LIST)

session = loader.load_session_data()
patient = loader.load_patient_data()
protocol = loader.load_protocol_data()
timeseries = loader.load_timeseries_data()

Database engine created successfully
Data successfully saved to rgs_interaction.csv
Database engine closed
query_timeseries_v3.sql
Database engine created successfully
Data successfully saved to rgs_timeseries.csv
Database engine closed
Function 'fetch_timeseries_data' executed in 9.8165 seconds


### PROCESSING

In [None]:
from ai_cdss.services.processing import ClinicalProcessor, ProtocolProcessor, TimeseriesProcessor
deficiency = ClinicalProcessor().process(patient)

In [None]:
from ai_cdss.services.processing import ProtocolProcessor
protocol_processor = ProtocolProcessor()
protocol_mapped = ProtocolProcessor().process(protocol)

In [None]:
from ai_cdss.services.processing import TimeseriesProcessor
timeseries_processor = TimeseriesProcessor()
timeseries = TimeseriesProcessor().process(timeseries)

In [None]:
from ai_cdss.services.scoring import compute_adherence
session_adh = compute_adherence(session)

ppf

In [13]:
from ai_cdss.services.processing import compute_ppf
ppf_long, contrib_long = compute_ppf(deficiency, protocol_mapped)

In [72]:
import pandas as pd
index = pd.MultiIndex.from_product([patient.index, protocol.PROTOCOL_ID], names=["PATIENT_ID", "PROTOCOL_ID"])
scoring_df = pd.DataFrame(index=index).reset_index()

usage

In [67]:
usage_df = (
    session
    .groupby(["PATIENT_ID", "PROTOCOL_ID"])["SESSION_ID"]
    .agg(USAGE="count")
    .reindex(index, fill_value=0)
    .reset_index()
)

subset

In [65]:
adh_df = session_adh.groupby(["PATIENT_ID", "PROTOCOL_ID"])["ADHERENCE_EWMA"].last().reset_index()
dm_df = timeseries.groupby(["PATIENT_ID", "PROTOCOL_ID"])["PARAMETER_VALUE_EWMA"].last().reset_index()

In [66]:
adh_df

Unnamed: 0,PATIENT_ID,PROTOCOL_ID,ADHERENCE_EWMA
0,775,206,0.853333
1,775,208,0.606097
2,775,209,0.280108
3,775,214,0.394473
4,775,220,0.435940
...,...,...,...
190,3318,211,1.000000
191,3318,214,0.999219
192,3318,219,1.000000
193,3318,221,1.000000


merge

In [69]:
dfs = [adh_df, dm_df, ppf_long, contrib_long, usage_df]

In [73]:
from functools import reduce

def merge_two_dfs(left, right):
    return pd.merge(left, right, on=["PATIENT_ID", "PROTOCOL_ID"], how="left")

scoring_df = reduce(merge_two_dfs, [scoring_df] + dfs)

In [74]:
scoring_df

Unnamed: 0,PATIENT_ID,PROTOCOL_ID,ADHERENCE_EWMA,PARAMETER_VALUE_EWMA,PPF,CONTRIB,USAGE
0,2110,214,,,0.527599,"[0.007430817164102516, 0.04953878109401672, 0....",0
1,2110,223,1.0,0.025068,0.543780,"[0.007110315816810179, 0.047402105445401146, 0...",7
2,2110,208,1.0,0.212500,0.473059,"[0.010409440728729657, 0.13879254304972863, 0....",2
3,2110,204,,,0.616753,"[0.006689881882458735, 0.1337976376491746, 0.0...",0
4,2110,205,1.0,,0.619409,"[0.00846463576614398, 0.16929271532287943, 0.0...",2
...,...,...,...,...,...,...,...
770,3432,228,,,0.544448,"[0.0, 0.0, 0.0, 0.11534015913609853, 0.2691270...",0
771,3432,232,,,0.531499,"[0.0, 0.0, 0.0, 0.15944962970727955, 0.3720491...",0
772,3432,230,,,0.587271,"[0.0, 0.0, 0.0, 0.18964040409231936, 0.3318707...",0
773,3432,233,,,0.568347,"[0.0, 0.0, 0.0, 0.1770448668945585, 0.30982851...",0


score

In [23]:
weights = [1, 1, 1]
scoring_df['SCORE'] = (
    scoring_df['ADHERENCE_EWMA'] * weights[0] +
    scoring_df['PARAMETER_VALUE_EWMA'] * weights[1] +
    scoring_df['PPF'] * weights[2]
)

In [24]:
protocols_ranked = (
    scoring_df.groupby('PATIENT_ID')
    .apply(lambda x: x.nlargest(10, 'SCORE'))
    .reset_index(drop=True)
)

  .apply(lambda x: x.nlargest(10, 'SCORE'))


schedule

In [25]:
import numpy as np
def schedule(df, days_per_week=7, prescriptions_per_day=5):
    """
    Generates a weekly schedule for each patient by distributing their top recommended protocols across the week.
    Ensures that:
    1. The same protocol is not scheduled twice in a single day.
    2. The total number of prescriptions is exactly `days_per_week * prescriptions_per_day`.
    
    Args:
    df (pd.DataFrame): Long format DataFrame with columns ['PATIENT_ID', 'PROTOCOL_ID'].
    days_per_week (int): Number of days in the schedule (default: 7).
    prescriptions_per_day (int): Number of protocols per day (default: 5).
    
    Returns:
    pd.DataFrame: A DataFrame where each row corresponds to a (PATIENT_ID, PROTOCOL_ID) pair,
                and the 'DAYS' column contains a list of day indexes (1-based) for when the protocol should be played.
    """
    total_prescriptions = days_per_week * prescriptions_per_day
    schedule_dict = {}

    for patient_id, group in df.groupby("PATIENT_ID"):
        protocols = group["PROTOCOL_ID"].tolist()

        # Expand protocol list to ensure at least `total_prescriptions`
        expanded_protocols = (protocols * ((total_prescriptions // len(protocols)) + 1))[:total_prescriptions]

        # Shuffle protocols for distribution across days
        np.random.shuffle(expanded_protocols)

        # Assign protocols to days ensuring no duplicates in a single day
        patient_schedule = {protocol: [] for protocol in protocols}
        day_protocols = [[] for _ in range(days_per_week)]
        
        for i, protocol in enumerate(expanded_protocols):
            day_idx = i % days_per_week
            if protocol not in day_protocols[day_idx]:  # Ensure no duplicate protocol on the same day
                day_protocols[day_idx].append(protocol)
                patient_schedule[protocol].append(day_idx + 1)  # Use 1-based indexing for days

        schedule_dict[patient_id] = patient_schedule

    # Convert to long format DataFrame
    structured_schedule = []
    for patient_id, protocols in schedule_dict.items():
        for protocol_id, days in protocols.items():
            structured_schedule.append({"PATIENT_ID": patient_id, "PROTOCOL_ID": protocol_id, "DAYS": days})

    schedule_df = pd.DataFrame(structured_schedule)
    df["DAYS"] = schedule_df.DAYS
    
    return df

In [26]:
prescriptions = schedule(protocols_ranked)

### UPDATING

In [28]:
marginal = (
    scoring_df
    .groupby("PATIENT_ID")["SCORE"]
    .mean()
)

In [29]:
below_mean = lambda x: x < x.mean()
interchange_mask = (
    prescriptions.groupby('PATIENT_ID')['SCORE']
    .transform(below_mean)
)
prescriptions["INTERCHANGE"] = interchange_mask

In [30]:
protocol_usage = usage_df[usage_df.PATIENT_ID == 775]
protocol_usage.index = protocol_usage.PROTOCOL_ID

In [31]:
from ai_cdss.services.scoring import compute_protocol_similarity
protocol_sim = compute_protocol_similarity(protocol)

In [33]:
def matrix_to_xy(df, columns=None, reset_index=False):
    bool_index = np.triu(np.ones(df.shape)).astype(bool)
    xy = (
        df.where(bool_index).stack().reset_index()
        if reset_index
        else df.where(bool_index).stack()
    )
    if reset_index:
        xy.columns = columns or ["row", "col", "val"]
    return xy

In [34]:
protocol_sim_long = matrix_to_xy(protocol_sim, columns=["PROTOCOL_ID", "PROTOCOL_SIM", "FCM"], reset_index=True)

In [None]:
# For a patient we want to substitute protocols below marginal value for highest therapeutic benefit with lowest usage

In [35]:
def find_substitute(protocol, protocol_sim, protocol_usage):
    # Exclude the current protocol
    protocols = protocol_sim.columns.drop(protocol)
    
    # Get usage and similarity data for other protocols
    usage = protocol_usage[protocols]
    sim = protocol_sim.loc[protocol, protocols]
    
    # Find the minimum usage value
    min_usage = usage.min()
    # Get candidates with the lowest usage
    candidates = usage[usage == min_usage].index
    
    # Among these candidates, select the one with highest similarity
    max_sim = sim[candidates].max()
    final_candidates = sim[sim == max_sim].index
    
    # Return the first candidate (or handle ties)
    return final_candidates[0] if not final_candidates.empty else None

In [36]:
protocol_usage = usage_df[usage_df.PATIENT_ID == 775]
protocol_usage.index = protocol_usage.PROTOCOL_ID
protocol_usage = protocol_usage.USAGE

In [38]:
def get_usage(session, patient_id):
    patient_sessions = session[session.PATIENT_ID == patient_id]
    patient_sessions.index = patient_sessions.PROTOCOL_ID
    return patient_sessions.USAGE

In [40]:
find_substitute(214, protocol_sim, get_usage(usage_df, 775))

223

In [None]:
def substitute_protocol(row):
    if row["INTERCHANGE"]:
        return find_substitute(
            row["PROTOCOL_ID"], 
            protocol_sim, 
            get_usage(usage_df, 775)
        )
    return row["PROTOCOL_ID"]

prescriptions["NEW_PROTOCOL_ID"] = prescriptions.apply(substitute_protocol, axis=1)

In [42]:
prescriptions

Unnamed: 0,PATIENT_ID,PROTOCOL_ID,ADHERENCE_EWMA,PARAMETER_VALUE_EWMA,PPF,CONTRIB,USAGE,SCORE,DAYS,INTERCHANGE,NEW_PROTOCOL_ID
0,775,222,0.999770,1.000000,0.632038,"[0.022302696143133578, 0.07434232047711194, 0....",10,2.631808,"[4, 2, 3]",False,222
1,775,206,0.853333,1.000000,0.528646,"[0.013972368519644753, 0.13972368519644754, 0....",12,2.381980,"[6, 7, 1]",False,206
2,775,224,1.000000,0.673047,0.632038,"[0.022302696143133578, 0.07434232047711194, 0....",11,2.305085,"[2, 4, 5]",False,224
3,775,208,0.606097,0.994970,0.484665,"[0.02088597029033065, 0.13923980193553767, 0.0...",13,2.085731,"[1, 5, 2, 7]",True,219
4,775,214,0.394473,0.950000,0.617411,"[0.014909525935815035, 0.04969841978605012, 0....",5,1.961884,"[1, 7, 3]",True,223
...,...,...,...,...,...,...,...,...,...,...,...
245,3432,219,,,0.719866,"[0.0076298266056033055, 0.15259653211206595, 0...",0,,"[3, 4, 5]",False,219
246,3432,209,,,0.724764,"[0.006030146672597829, 0.12060293345195647, 0....",0,,"[2, 7, 3]",False,209
247,3432,206,,,0.613381,"[0.00635159662385283, 0.12703193247705646, 0.0...",0,,"[5, 3, 7]",False,206
248,3432,226,,,0.780724,"[0.005855029580538683, 0.11710059161077356, 0....",0,,"[5, 6, 7]",False,226
