# import statements and upload data

In [8]:
import pandas as pd
import numpy as np
# import torch
# import torch.optim as optim
# from torch import nn
# from torch.utils.data import Dataset, DataLoader
# import shap

# from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report, mean_squared_error
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler 

# data = pd.read_csv("../clean_data/nafl/combined.large.nafl.csv")

In [50]:
# create the X and Y datasets

# data = data.drop(columns='DaysUntilFirstProgression')
data = data.drop(columns='Outcome')
data = data.drop(columns='Censored')

# Y = data[['StudyID', 'Outcome']]
Y = data[['StudyID', 'DaysUntilFirstProgression']]
X = data.drop(columns='DaysUntilFirstProgression')
X = X.drop(columns=['mean_BMI_category', 'last_BMI_category'])

X = X.set_index('StudyID')
Y = Y.set_index('StudyID')

# get all features that start with Lab
lab_feat = [feat for feat in X.columns if 'Lab' in feat]
numerical_feat = ['mean_BMI', 'last_BMI', 'FirstNAFL.Age.90']
numerical_feat.extend(lab_feat)

In [55]:
# check if GPU is enabled
device = "cuda" if torch.cuda.is_available() else "cpu" # need to define device since python can use both cpu and gpu
print(f"Using {device} device")
print(f"Shape of X: {X.shape}. Shape of Y: {Y.shape}.")

Using cuda device
Shape of X: (11890, 2895). Shape of Y: (11890, 1).


# setup the model using saved weights

In [18]:
# curate the dataset
class MAFLDDataset(Dataset): # must contain init, len, and getitem
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# dataset = MAFLDDataset(X_torch, Y_torch)
# train_loader = DataLoader(dataset, batch_size=64, shuffle=True) # batch size 64

In [19]:
# define by subclassing nn.Module and initialize the neural network layers in __init__.
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__() # inherit init from parent class
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(X.shape[1], 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1), # no activation follows this layer
        )
        # self.sigmoid = nn.Sigmoid() # remove if predicting non-binary outcome

    def forward(self, x):
        pred = self.linear_relu_stack(x)
        return pred

In [20]:
# create an instance of NeuralNetwork, move to device, print its structure
model = NeuralNetwork().to(device)
model.load_state_dict(torch.load("numeric_nn_scaled_x_and_y.pth"))

# define loss function and optimizer
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3) # start with this baseline learning rate

# scale the data

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [22]:
# write a function to only standardize the numerical columns and reattach to the rest of the dataframe
scaler = StandardScaler()

def standardize_numerical(dataframe, num_feat=numerical_feat, training_set=True):
    """
    dataframe: Pandas DataFrame

    Returns: a processed DataFrame where the numerical features have been standardized and the categorical features remain the same.
    """
    if training_set:
        scaled = scaler.fit_transform(dataframe[num_feat])
    else:
        scaled = scaler.transform(dataframe[num_feat])
        
    scaled_df = pd.DataFrame(scaled, columns=num_feat, index=dataframe.index)
    cat = dataframe.drop(columns=num_feat)
    processed = pd.concat([scaled_df, cat], axis=1)

    return processed

In [23]:
# standardize our features
X_train_scaled = standardize_numerical(X_train, training_set=True)
X_test_scaled = standardize_numerical(X_test, training_set=False)

In [24]:
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.to_numpy().reshape(-1, 1))

# get the features from lin/log reg model

In [26]:
neg_feat_lin_reg = ['MedType_Code_HCPCS_J0456', 'MedType_Code_EPIC-MED_17380', 'MedType_Code_EPIC-MED_26226', 'MedType_Code_EPIC-MED_6004080010', 'Gender_Legal_Sex_Unknown-U', 'MedType_Code_HCPCS_J2720', 'MedType_Code_HCPCS_J2590', 'MedType_Code_EPIC-MED_29132', 'MedType_Code_EPIC-MED_18302', 'MedType_Code_EPIC-PRC_47501945']
pos_feat_lin_reg = ['MedType_Code_EPIC-PRC_47561667', 'MedType_Code_EPIC-MED_98249', 'MedType_Code_HCPCS_J1756', 'MedType_Code_EPIC-MED_6677', 'MedType_Code_HCPCS_J1453', 'MedType_Code_HCPCS_J7507', 'MedType_Code_EPIC-PRC_77100001', 'MedType_Code_HCPCS_J2185', 'MedType_Code_HCPCS_C9113', 'MedType_Code_EPIC-MED_21063']

In [27]:
neg_feat_log_reg = ['MedType_Code_HCPCS_J0456', 'MedType_Code_EPIC-MED_17380', 'MedType_Code_EPIC-MED_26226', 'MedType_Code_EPIC-MED_6004080010', 'Gender_Legal_Sex_Unknown-U', 'MedType_Code_HCPCS_J2720', 'MedType_Code_HCPCS_J2590', 'MedType_Code_EPIC-MED_29132', 'MedType_Code_EPIC-MED_18302', 'MedType_Code_EPIC-PRC_47501945']
pos_feat_log_reg = ['MedType_Code_EPIC-PRC_47561667', 'MedType_Code_EPIC-MED_98249', 'MedType_Code_HCPCS_J1756', 'MedType_Code_EPIC-MED_6677', 'MedType_Code_HCPCS_J1453', 'MedType_Code_HCPCS_J7507', 'MedType_Code_EPIC-PRC_77100001', 'MedType_Code_HCPCS_J2185', 'MedType_Code_HCPCS_C9113', 'MedType_Code_EPIC-MED_21063']

# build shap explainer

In [25]:
def model_wrapper(array):
    ''' Wrapper around the torch model() function to pass into shap explainer.
        array: Pandas DataFrame
        Returns: torch
    '''
    if isinstance(array, pd.DataFrame):
        array = array.to_numpy()
    array = torch.tensor(array.astype('float32')).to(device)
    # array = array.to(device)
    model.eval()
    y_hat_test_scaled = model(array)
    y_hat_test_scaled = y_hat_test_scaled.cpu().detach()
    y_pred_rescaled = scaler_y.inverse_transform(y_hat_test_scaled.numpy())
    return y_pred_rescaled

feature_names = X.columns

In [30]:
X_test_scaled.shape

(3567, 2896)

In [31]:
X_train_scaled.shape

(8323, 2896)

In [None]:
explainer = shap.DeepExplainer(model, torch.tensor(X_train_scaled.to_numpy().astype(np.float32)).to(device))

In [None]:
shap_values = explainer.shap_values(torch.tensor(X_test_scaled.to_numpy().astype(np.float32)).to(device))

In [None]:
# Violin plot! Each of the 17 features are represented by a distribution of SHAP scores across 246 test samples
for i in range(5):
  shap.plots.violin(shap_values[:,:,i], features=features_test_df, feature_names=feature_names, plot_type='layered_violin')

# getting human-readable names from the shap features

In [16]:
shap_positive = ['Lab_19153-6',
 'Lab_2093-3',
 'MedType_Code_EPIC-MED_10328',
 'Lab_3094-0',
 'Lab_2089-1',
 'Lab_1968-7',
 'Lab_2695-5',
 'MedType_Code_EPIC-MED_10012',
 'MedType_Code_EPIC-MED_10368',
 'MedType_Code_EPIC-MED_27698']

shap_negative = ['Lab_777-3',
 'Lab_2336-6',
 'Lab_4679-7',
 'Lab_2502-3',
 'Lab_2284-8',
 'Lab_789-8',
 'Code_Z12.5',
 'MedType_Code_LMR_576',
 'MedType_Code_EPIC-MED_693',
 'Lab_XC5-9']

shap_top_10 = ['Lab_4679-7',
 'Lab_14338-8',
 'Lab_2132-9',
 'Lab_6768-6',
 'Code_Z23',
 'Lab_6690-2',
 'Lab_2093-3',
 'MedType_Code_EPIC-MED_10328',
 'Lab_13457-7',
 'Lab_2571-8']

In [9]:
# setup the translate function
med_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Med_all.use.final.txt", delimiter="\t", header=0)
lab_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Lab_all.use.final.txt", delimiter="\t", header=0)
dia_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Dia_all.use.final.txt", delimiter="\t", header=0)

med_codes = "MedType_Code_" + med_df["Code_Type"] + "_" + med_df["Code"]
med_codes_df = pd.concat([med_codes, med_df["Medication"]], axis=1)
med_codes_df.columns = ["Code", "Medication"]
med_codes_df = med_codes_df.drop_duplicates() # drop duplicate codes and medications

lab_codes = "Lab_" + lab_df["Loinc_Code"]
lab_codes_df = pd.concat([lab_codes, lab_df["Test_Description"]], axis=1)
lab_codes_df.columns = ["Code", "Lab Test"]
lab_codes_df = lab_codes_df.drop_duplicates() # drop duplicate codes and medications

dia_codes = "Code_" + dia_df["Code"]
dia_codes_df = pd.concat([dia_codes, dia_df["Diagnosis_Name"]], axis=1)
dia_codes_df.columns = ["Code", "Diagnosis"]
dia_codes_df = dia_codes_df.drop_duplicates() # drop duplicate codes and medications

  lab_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Lab_all.use.final.txt", delimiter="\t", header=0)


In [95]:
def translate_codes(input):
    """
    Given a list of various codified features, return a list in the same order with human-readable names.
    input: list
    Returns: list
    """
    output = []

    for code in input:
        if 'Med' in code:
            translation = translate_helper(code, med_codes_df)
        elif 'Lab' in code:
            translation = translate_helper(code, lab_codes_df)
        elif 'Code' in code:
            translation = translate_helper(code, dia_codes_df)
        else:
            translation = "Unknown"

        output.append({'code': code, 'description': translation})

    return pd.DataFrame(output)
            
def translate_helper(code, df):
    foo = df[df['Code'] == code] # .drop_duplicates(subset='Code', keep='first')
    return foo.iloc[:, 1]

In [110]:
foo = ['Code_R53.83',
 'Code_R79.89',
 'Lab_2078-4',
 'Lab_3094-0',
 'Code_E11.9',
 'Lab_2132-9',
 'Lab_2571-8',
 'Code_M79.672',
 'Code_175',
 'Code_R53.81']

bar = translate_codes(foo)
bar

Unnamed: 0,code,description
0,Code_R53.83,"1109 Other fatigue Name: Diagnosis, dtype: object"
1,Code_R79.89,"1044 Other specified abnormal findings of blood chemistry Name: Diagnosis, dtype: object"
2,Lab_2078-4,"18089 URINE CHLORIDE Urine (Test:el:5200015097) 31321 Urine Chloride (Test:mcsq-vcl) 596463 Urine Chloride (Test:bcvcl) Name: Lab Test, dtype: object"
3,Lab_3094-0,"21 Plasma Urea Nitrogen (Test:mcsq-pbun) 143 Urea Nitrogen (Test:bcpbun) 356 BUN Blood (Test:el:5200010415) 11923 Urea Nitrogen (Test:mcsq-bun) 14265 BUN Blood (Test:el:5200002387) 50628 Urea Nitrogen (Test:mcsq-bun1) 439305 BUN (Test:el:5200002391) 1073258 BUN (Test:mcsq-bun7) Name: Lab Test, dtype: object"
4,Code_E11.9,"1934 Type 2 diabetes mellitus without complications Name: Diagnosis, dtype: object"
5,Lab_2132-9,"59 Vitamin B12 (Test:mcsq-b12) 370 VITAMIN B12 Blood (Test:el:5200001783) 15350 Vitamin B12 (Test:bcb12) Name: Lab Test, dtype: object"
6,Lab_2571-8,"5 Triglycerides (Test:mcsq-trig) 417 TRIGLYCERIDES (Test:el:5200014087) 2152 Triglycerides (Test:bctrig) 133664 Triglycerides (Test:mcsq-trig1) 9668952 MAYO TRIGLYCERIDES (Test:mcsq-matrig) 11464383 MAYO TRIGLYCERIDES (Test:el:5200008546) Name: Lab Test, dtype: object"
7,Code_M79.672,"3403 Pain in left foot Name: Diagnosis, dtype: object"
8,Code_175,"5227 Percutaneous Cardiovascular Procedures W/O Ami (APR v30) Name: Diagnosis, dtype: object"
9,Code_R53.81,"1016 Other malaise Name: Diagnosis, dtype: object"


In [128]:
translate_codes(['Lab_4679-7'])

Unnamed: 0,code,description
0,Lab_4679-7,"280 Retic (Test:mhsq-retic) 13889 RETIC Blood (Test:el:5200012192) 14235 RETIC (%) Blood (Test:el:5200012188) 22963 Retic (Test:bhretic) 497891 RETIC (%) (Test:mhsq-retcp) 1917552 RETIC (%) (Test:bhretcp) Name: Lab Test, dtype: object"


In [84]:
pd.set_option('display.max_colwidth', 3000)

In [102]:
[Other fatigue, Other specified abnormal findings of blood chemistry, Urine Chloride, Plasma Urea Nitrogen, Type 2 diabetes mellitus without complications, Vitamin B12, Triglycerides, Pain in left foot, Percutaneous Cardiovascular Procedures, Other malaise]




0                                          641    Meperidine (Pf) 100 mg/ml Injection Solution
Name: Medication, dtype: object
1                                          1413    Insulin Lispro 100 Unit/ml Subcutaneous Pen
Name: Medication, dtype: object
2                                              13837    Allergy, unspecified, initial encounter
Name: Diagnosis, dtype: object
3                                                                    96906    Acne, unspecified
Name: Diagnosis, dtype: object
4                                 53923    Major Pancreas, Liver And Shunt Procedures (APR v30)
Name: Diagnosis, dtype: object
5    2637    Other specified disorders involving the immune mechanism, not elsewhere classified
Name: Diagnosis, dtype: object
6                                                                 1285    Diazepam 5 mg Tablet
Name: Medication, dtype: object
7                                                            207453    Hypersomnia, unspecified
Name: Diagnosis

In [None]:
[Meperidine (Pf) 100 mg/ml Injection Solution, Insulin Lispro 100 Unit/ml Subcutaneous Pen, Allergy, unspecified, initial encounter, Acne, unspecified, Major Pancreas, Liver And Shunt Procedures, Other specified disorders involving the immune, Diazepam 5 mg Tablet, Hypersomnia, unspecified, Pneumococcal Polysac Vaccine 23-V 2 Yrs/>subq/, Abnormal findings on diagnostic imaging of liver and biliary tract
]




In [22]:
shap_positive_med = [x for x in shap_positive if 'Med' in x]
shap_positive_lab = [x for x in shap_positive if 'Lab' in x]
shap_positive_dia = [x for x in shap_positive if 'Code' in x]

shap_negative_med = [x for x in shap_negative if 'Med' in x]
shap_negative_lab = [x for x in shap_negative if 'Lab' in x]
shap_negative_dia = [x for x in shap_negative if 'Code' in x]

In [4]:
med_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Med_all.use.final.txt", delimiter="\t", header=0)
med_df.head()

Unnamed: 0,StudyID,Medication,Code_Type,Code,Quantity,Inpatient_Outpatient,Additional_Info,Med.Age.90,Med.daysfrom_firstNAFL,Med.before.ICD11
1,1,Lorazepam 0.5 mg Tablet,EPIC-MED,4572,,Outpatient,DOSE=0.5 MG; FREQ=BID; ROUTE=Oral,82,-904,y
2,1,Oxycodone-Acetaminophen 5 mg-325 mg Tablet,EPIC-MED,5940,,Outpatient,,82,-904,y
3,1,Amlodipine (norvasc) - LMR 953,LMR,953,,Outpatient,,81,-1463,y
4,1,Amlodipine 2.5 mg Tablet,EPIC-MED,9070,,Outpatient,DOSE=2.5 MG; FREQ=Daily; ROUTE=Oral,81,-1165,y
5,1,Amlodipine (norvasc) - LMR 953,LMR,953,,Outpatient,,80,-1616,y


In [6]:
med_codes = "MedType_Code_" + med_df["Code_Type"] + "_" + med_df["Code"]
med_codes_df = pd.concat([med_codes, med_df["Medication"]], axis=1)
med_codes_df.columns = ["Code", "Medication"]
med_codes_df.head()
med_codes_df = med_codes_df.drop_duplicates() # drop duplicate codes and medications
med_codes_df.shape

(18820, 2)

In [7]:
med_codes_df[med_codes_df['Code'].isin(shap_positive_med)] # most positive descriptions

Unnamed: 0,Code,Medication
184,MedType_Code_EPIC-MED_2007,"Cyanocobalamin (Vit B-12) 1,000 mcg/ml Injecti..."
219,MedType_Code_EPIC-MED_10328,Iopamidol 76 % Intravenous Solution


In [27]:
buz = med_codes_df[med_codes_df['Code'].isin(shap_negative_med)] # most positive descriptions

In [28]:
buz.iloc[:, 1].to_list()

['Flovent Hfa 110 mcg/Actuation Aerosol Inhaler',
 'Venlafaxine Er 37.5 mg Capsule,Extended Release 24 Hr']

In [9]:
lab_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Lab_all.use.final.txt", delimiter="\t", header=0)
lab_df.head()

  lab_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Lab_all.use.final.txt", delimiter="\t", header=0)


Unnamed: 0,StudyID,Group_Id,Loinc_Code,Test_Id,Test_Description,Result,Abnormal_Flag,Reference_Units,Reference_Range,Toxic_Range,Specimen_Type,Specimen_Text,Correction_Flag,Test_Status,Lab.Age.90,Lab.daysfrom_firstNAFL,Lab.before.ICD11
1,1,GLU,2345-7,SQ-PGLU,Plasma Glucose (Test:mcsq-pglu),108.00000,,mg/dL,70-110,,,,,,84,-104,y
2,1,PLT,777-3,SQ-PLT,PLT (Test:mhsq-plt),172.00000,,K/uL,150-400,,,,,,82,-938,y
3,1,UA-COLOR,5778-6,SQ-UCOLOR,UA-Color (Test:mhsq-ucolor),YELLOW,,,Yellow,,,,,,84,-104,y
4,1,WBC,6690-2,SQ-WBC,WBC (Test:mhsq-wbc),7.84000,,K/uL,4.5-11.0,,,,,,82,-904,y
5,1,TRIG,2571-8,SQ-TRIG,Triglycerides (Test:mcsq-trig),85.00000,,mg/dL,40-150,,,,,,83,-524,y


In [10]:
lab_codes = "Lab_" + lab_df["Loinc_Code"]
lab_codes_df = pd.concat([lab_codes, lab_df["Test_Description"]], axis=1)
lab_codes_df.columns = ["Code", "Lab Test"]
lab_codes_df.head()
lab_codes_df = lab_codes_df.drop_duplicates() # drop duplicate codes and medications
lab_codes_df.shape

(851, 2)

In [33]:
lab_codes_df[lab_codes_df['Code'] == 'Lab_19153-6']

Unnamed: 0,Code,Lab Test
1848,Lab_19153-6,TOTAL VOLUME (Test:el:5200009536)
15597,Lab_19153-6,TOTAL VOLUME (Test:bcmtv2)
15648,Lab_19153-6,TOTAL VOLUME (Test:bcmtv8)
30432,Lab_19153-6,TOTAL VOLUME (Test:mcsq-mtv8)
221467,Lab_19153-6,TOTAL VOLUME (Test:el:5200009497)
243425,Lab_19153-6,TOTAL VOLUME (Test:mcsq-mtv1)
272678,Lab_19153-6,TOTAL VOLUME Urine (Test:el:5200009507)
415540,Lab_19153-6,TOTAL VOLUME (Test:mcsq-mtv2)
654174,Lab_19153-6,TOTAL VOLUME (Test:mcsq-mtv29)
909320,Lab_19153-6,TOTAL VOLUME (Test:mcsq-mtv16)


In [17]:
foo = lab_codes_df[lab_codes_df['Code'].isin(shap_positive_lab)] # most positive descriptions

In [18]:
foo_first = foo.drop_duplicates(subset='Code', keep='first')

In [19]:
foo_first

Unnamed: 0,Code,Lab Test
21,Lab_3094-0,Plasma Urea Nitrogen (Test:mcsq-pbun)
24,Lab_1920-8,Transaminase-SGOT (Test:mcsq-sgot)
42,Lab_2093-3,Cholesterol (Test:mcsq-chol)
280,Lab_4679-7,Retic (Test:mhsq-retic)
833,Lab_2532-0,Lactic Dehydrogenase (Test:mcsq-ldh)
36148,Lab_14338-8,PREALBUMIN (Test:el:5200011054)


In [26]:
bar = lab_codes_df[lab_codes_df['Code'].isin(shap_negative_lab)] # most positive descriptions
bar_first = bar.drop_duplicates(subset='Code', keep='first')
bar_first['Lab Test'].to_list()

['Plasma Anion GAP (Test:mcsq-panion)',
 'Vitamin B12 (Test:mcsq-b12)',
 'Total Protein (Test:mcsq-tp)',
 'Urine Creatinine (Test:mcsq-vcre)',
 'Urine Microalbumin (Test:mcsq-malb)',
 'CALCULATED LDL Blood (Test:el:5200003273)',
 'Urine Urea Nitrogen (Test:mcsq-vun)',
 'Osmolality (Test:bcosm)']

In [11]:
dia_df = pd.read_csv("/nobackup/users/ericason/mlhc-final-project/data/NAFLpatients_Jan2025request/Dia_all.use.final.txt", delimiter="\t", header=0)
dia_df.head()

Unnamed: 0,StudyID,Diagnosis_Name,Code_Type,Code,Diagnosis_Flag,Inpatient_Outpatient,Dia.Age.90,Dia.daysfrom_firstNAFL,Dia.before.ICD11
1,1,Generalized anxiety disorder,ICD10,F41.1,,Outpatient,81,-1435,y
2,1,Generalized anxiety disorder,ICD10,F41.1,Primary,Outpatient,86,567,y
3,1,Unspecified symptoms and signs involving the g...,ICD10,R39.9,Primary,Outpatient,84,-97,y
4,1,Essential (primary) hypertension,ICD10,I10,,Outpatient,82,-1009,y
5,1,"Disorientation, unspecified",ICD10,R41.0,,Outpatient,84,-104,y


In [13]:
dia_codes = "Code_" + dia_df["Code"]
dia_codes_df = pd.concat([dia_codes, dia_df["Diagnosis_Name"]], axis=1)
dia_codes_df.columns = ["Code", "Diagnosis"]
dia_codes_df.head()
dia_codes_df = dia_codes_df.drop_duplicates() # drop duplicate codes and medications
dia_codes_df.shape

(26359, 2)

In [23]:
bax = dia_codes_df[dia_codes_df['Code'].isin(shap_negative_dia)] # most positive descriptions
bax_first = bax.drop_duplicates(subset='Code', keep='first')
bax_first

Unnamed: 0,Code,Diagnosis


In [24]:
baz = dia_codes_df[dia_codes_df['Code'].isin(shap_positive_dia)] # most positive descriptions
baz_first = baz.drop_duplicates(subset='Code', keep='first')
baz_first

Unnamed: 0,Code,Diagnosis
38,Code_Z23,Encounter for immunization
588,Code_N20.0,Calculus of kidney


In [3]:
import pickle
file_path = 'results/coxph_shap_values_scaledx.pkl'
with open(file_path, 'rb') as file:
    foo = pickle.load(file)