In [1]:
import os

# Cache
from joblib import Memory

# Data
import numpy as np
import pandas as pd

# Stats
import scipy.stats as stats
from scipy.stats import spearmanr
from scipy.stats import linregress
from scipy.signal import savgol_filter
from scipy.optimize import curve_fit
import pingouin as pg

import statsmodels.api as sm
import statsmodels.formula.api as smf

from sklearn.metrics import r2_score

# CDSS
from ai_cdss.data_loader import DataLoader
from ai_cdss.data_processor import DataProcessor
from ai_cdss.constants import *

from cdss_utils import expand_session, check_session, safe_merge, filter_study_range, get_confidence_intervals, generate_patient_protocol_report

# Plotting
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.gridspec import GridSpec


In [12]:
memory = Memory(location='cache_dir', verbose=0)

@memory.cache
def load_session_cached(patient_list):
    return check_session(loader.load_session_data(patient_list=patient_list))

@memory.cache
def load_timeseries_cached(patient_list):
    return loader.load_timeseries_data(patient_list=patient_list)

SAVE_DATA = True
EXPAND = False

# NEST DATA
nest_patient = [
    204, 775,  787,  788, 1123, 1169, 1170, 1171, 1172, 1173, 1983, 2110, 2195,
    2955, 2956, 2957, 2958, 2959, 2960, 2961, 2962, 2963, 3081, 3229, 3318, 3432
]

rgs_mode = "app"
scoring_weights = [1, 1, 1]
ewma_alpha = 0.2

n = 12
days = 7
protocols_per_day = 5

loader = DataLoader(rgs_mode=rgs_mode)
processor = DataProcessor(weights=scoring_weights, alpha=ewma_alpha)

# Load Data
session = load_session_cached(nest_patient)
timeseries = load_timeseries_cached(nest_patient)
ppf = loader.load_ppf_data(patient_list=nest_patient)

# ppf = loader.load_ppf_data(patient_list=nest_patient)
protocol_similarity = loader.load_protocol_similarity()
protocol_metrics = loader.load_protocol_init()

# Process Data
session = session.dropna(subset=["ADHERENCE"])
ts = processor.aggregate_dms_by_time(timeseries)
ts = ts.sort_values(by=BY_PPST)

# Merge
nest_data = safe_merge(session, ts, on=BY_PPS, how="inner", left_name="session", right_name="ts")

Database engine created successfully


INFO:ai_cdss.processing:Session data cleaned. Final shape: (2911, 34)
INFO:ai_cdss.data_loader:Session data loaded successfully.
INFO:cdss_utils:No patients without prescription found.
INFO:cdss_utils:No sessions without adherence found.
INFO:cdss_utils:Session data cleaned. Final shape: (2911, 34)


Database engine closed
Patient registered but no data yet.
Sessions in session table but not in recording table
Patient registered but no data yet.
Sessions in session table but not in recording table
Database engine created successfully
Database engine closed
Database engine created successfully
Database engine closed


  ppf_data = pd.concat([ppf_data, missing_combinations], ignore_index=True)


SchemaError: error in check_types decorator of function 'load_ppf_data': non-nullable series 'PPF' contains null values:
775   NaN
776   NaN
777   NaN
778   NaN
779   NaN
780   NaN
781   NaN
782   NaN
783   NaN
784   NaN
785   NaN
786   NaN
787   NaN
788   NaN
789   NaN
790   NaN
791   NaN
792   NaN
793   NaN
794   NaN
795   NaN
796   NaN
797   NaN
798   NaN
799   NaN
800   NaN
801   NaN
802   NaN
803   NaN
804   NaN
805   NaN
Name: PPF, dtype: float64