In [1]:
import pandas as pd
import numpy as np
import time
import sys
import os
import opticl
import utils_gastric as gi

# Optimization modelling
from pyomo import environ
from pyomo.environ import *

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
# df_train = pd.read_csv('processed-data/gastric_train2008.csv')
# df_test = pd.read_csv('processed-data/gastric_test2008.csv')

# # Also load tox_summary, which reports quantiles for various toxicities as candidate upper bounds
tox_summary = pd.read_csv('processed-data/gastric_toxicity_summary.csv')
tox_summary

Unnamed: 0,outcome,quantile_0.0,quantile_0.1,quantile_0.2,quantile_0.3,quantile_0.4,quantile_0.5,quantile_0.6,quantile_0.7,quantile_0.8,quantile_0.9,quantile_1.0
0,Neutro4,0.0,0.0,0.0,0.0252,0.0436,0.075,0.111,0.15,0.222,0.354532,0.719
1,Thrombo4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0214,0.04325,0.65768
2,Anemia4,0.0,0.0,0.0,0.0,0.0,0.0,0.000269,0.013713,0.02,0.037966,0.11
3,Lympho4,0.0,0.0,0.001926,0.00525,0.007936,0.009619,0.014011,0.018752,0.024456,0.032566,0.161807
4,GINONV_34,0.0,0.0,0.022,0.0344,0.05,0.068,0.09,0.106,0.138,0.1968,0.36
5,ALLERGY_34,0.0,0.0,0.0,0.000229,0.002501,0.005054,0.008763,0.014992,0.022031,0.035833,0.11709
6,AUDITORY_34,0.0,0.0,0.0,0.000756,0.002409,0.003745,0.005021,0.007165,0.011387,0.014675,0.069323
7,CARDIO_34,0.0,0.0,0.001143,0.01103,0.017628,0.022596,0.029498,0.040237,0.05,0.065682,0.28
8,COAGULATION_34,0.0,0.0,0.0,0.0,0.006606,0.011474,0.018295,0.025396,0.031145,0.039933,0.093381
9,CONSTITUTIONAL_34,0.0,0.0,0.02,0.04,0.0506,0.066598,0.076095,0.081818,0.1,0.18,0.41


In [3]:
# outcomes = gi.outcomes
# X_train = df_train[gi.X_cols+gi.T_cols]
# X_test = df_test[gi.X_cols+gi.T_cols]

# freq = np.sum(X_train>0, axis=0)
# cols_sub = freq[freq >= 2].index

# X_train = X_train.loc[:,cols_sub]
# X_test = X_test.loc[:,cols_sub]
# for o in outcomes:
#     data_train = X_train.drop(['Pub_Year'], inplace=False, axis=1)
#     data_train[o] = df_train[o]
#     data_train.to_csv(f'processed-data/data_train_{o}.csv', index=False)
#     data_test = X_test.drop(['Pub_Year'], inplace=False, axis=1)
#     data_test[o] = df_test[o]
#     data_test.to_csv(f'processed-data/data_test_{o}.csv', index=False)

In [4]:
outcomes = gi.outcomes
datasets_train = {}
datasets_test = {}

# Training datasets
for o in outcomes:
    data = pd.read_csv(f'processed-data/data_train_{o}.csv')
    y = data[o]
    X = data.drop([o], inplace=False, axis=1)
    datasets_train[o] = (X, y)
    
# Testing datasets
for o in outcomes:
    data = pd.read_csv(f'processed-data/data_test_{o}.csv')
    y = data[o]
    X = data.drop([o], inplace=False, axis=1)
    datasets_test[o] = (X, y)

In [5]:
constraints_embed = gi.outcomes[:-1]
ub_quantile = 'quantile_0.7'
alg_dict = {'cart': None, 'rf': None, 'linear': None}
outcomes = gi.outcomes
gr=False
bs = 0

tox_constrain = tox_summary.loc[tox_summary.loc[:,'outcome'].isin(constraints_embed),['outcome',ub_quantile]]
outcome_list = {outcome: {'lb':None, 'ub':tox_constrain[tox_constrain['outcome'] == outcome].iloc[0, 1], 'objective_weight':0,'group_models':gr,
'task_type': 'continuous', 'alg_list':alg_dict, 'bootstrap_iterations':bs,
                                   'X_train':datasets_train[o][0], 'y_train':datasets_train[o][1], 'X_test':datasets_test[o][0], 'y_test':datasets_test[o][1],
                                   'dataset_path': f'processed-data/data_train_{outcome}.csv'} for outcome in tox_constrain['outcome']}
outcome_list['OS'] = {'lb':None, 'ub':None, 'objective_weight':1,'group_models':gr,
'task_type': 'continuous', 'alg_list':alg_dict, 'bootstrap_iterations':bs,
                                   'X_train':datasets_train['OS'][0], 'y_train':datasets_train['OS'][1], 'X_test':datasets_test['OS'][0], 'y_test':datasets_test['OS'][1],
                                   'dataset_path':'processed-data/data_train_OS.csv'}

viol_rule = 0.5

gr=False
bs = 0

print("Algorithms = %s" % alg_dict)
print("Bootstrap iterations = %d" % bs)
print("Violation rule = %s" % str(viol_rule))
code_version = 'AAAI-23_CHEMOexample'

version = 'vAAAI-23_CHEMOexample'

Algorithms = {'cart': None, 'rf': None, 'linear': None}
Bootstrap iterations = 0
Violation rule = 0.5


In [6]:
performance = opticl.train_ml_models(outcome_list, version)
if not os.path.exists('results'):
    os.makedirs('results')
performance.to_csv('results/%s_performance.csv' % (code_version))

print("\nPreparing model master")
if viol_rule == 'average':
    gr_method = 'average'
    max_viol = None
    print("Group method = %s" % (gr_method))
    gr_string = 'average'
else: 
    gr_method = 'violation'
    max_viol = float(viol_rule)
    print("Group method = %s (violation limit = %.2f)" % (gr_method, max_viol))
    gr_string = 'violation_%.2f' % max_viol

Learning a model for Neutro4
No bootstrap - training on full training data
training Neutro4 with cart
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = cart, metric = None
saving... results/cart_Neutro4_trained.pkl
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 2.677540495706479
Train R2: 0.6245307850448949
-------------------testing evaluation-----------------------
Test MSE: 14.39376716117995
Test R2: 0.2610949348332674

training Neutro4 with rf
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = rf_shallow, metric = None
saving... results/rf_shallow_Neutro4_trained.pkl
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 2.5598351472863423
Train R2: 0.641036505439486
-------------------testing evaluation-----------------------
Test M


training DLT_PROP with rf
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = rf_shallow, metric = None
saving... results/rf_shallow_DLT_PROP_trained.pkl
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 2.5598351472863423
Train R2: 0.641036505439486
-------------------testing evaluation-----------------------
Test MSE: 13.488176012634048
Test R2: 0.3075835211177125

training DLT_PROP with linear
------------- Initialize grid  ----------------
------------- Running model  ----------------
Algorithm = linear, metric = None
saving... results/linear_DLT_PROP_trained.pkl
------------- Model evaluation  ----------------
-------------------training evaluation-----------------------
Train MSE: 3.975810409525942
Train R2: 0.4424755047892708
-------------------testing evaluation-----------------------
Test MSE: 12.836334118654728
Test R2: 0.3410458712971851

Learning a 

In [7]:
patient_ID = 0

pt = datasets_test['OS'][0].loc[patient_ID, :]

features = pt.keys()

mm = opticl.initialize_model_master(outcome_list)
mm.loc[outcomes,'group_method'] = gr_method
mm.loc[outcomes,'max_violation'] = max_viol
mm.loc[outcomes, 'trust_region'] = False
mm.loc[outcomes, 'var_features'] = [[i for i in gi.X_cols if i in features]]
mm.loc[outcomes, 'contex_features'] = [{i:pt[i] for i in gi.T_cols if i in features}]
model_master = opticl.model_selection(mm, performance)

if not os.path.exists('experiments'):
    print('Creating folder...')
    os.makedirs('experiments')
model_master.to_csv('experiments/model_master_%s.csv' % (code_version), index = True)

opticl.check_model_master(model_master)
model_master

                                                               model  \
Neutro4            {'results/rf/vAAAI-23_CHEMOexample_Neutro4_mod...   
GINONV_34          {'results/rf/vAAAI-23_CHEMOexample_GINONV_34_m...   
CONSTITUTIONAL_34  {'results/rf/vAAAI-23_CHEMOexample_CONSTITUTIO...   
INFECTION_34       {'results/rf/vAAAI-23_CHEMOexample_INFECTION_3...   
OTHER_34           {'results/rf/vAAAI-23_CHEMOexample_OTHER_34_mo...   
DLT_PROP           {'results/rf/vAAAI-23_CHEMOexample_DLT_PROP_mo...   
OS                 {'results/rf/vAAAI-23_CHEMOexample_OS_model.cs...   

                         task objective    lb        ub  \
Neutro4            continuous         0  None      0.15   
GINONV_34          continuous         0  None     0.106   
CONSTITUTIONAL_34  continuous         0  None  0.081818   
INFECTION_34       continuous         0  None     0.075   
OTHER_34           continuous         0  None  0.100811   
DLT_PROP           continuous         0  None  0.637304   
OS        

Unnamed: 0,model,task,objective,lb,ub,features,var_features,contex_features,group_models,group_method,ensemble_weights,max_violation,trust_region,dataset_path,clustering_model,enlargement,SCM_counterfactuals
Neutro4,{'results/rf/vAAAI-23_CHEMOexample_Neutro4_mod...,continuous,0,,0.15,"Index(['Asia', 'N_Patient', 'FRAC_MALE', 'AGE_...","[Asia, N_Patient, FRAC_MALE, AGE_MED, Prior_Pa...","{'Capecitabine_Ind': 1.0, 'Carboplatin_Ind': 0...",False,violation,,0.5,False,processed-data/data_train_Neutro4.csv,,[0],
GINONV_34,{'results/rf/vAAAI-23_CHEMOexample_GINONV_34_m...,continuous,0,,0.106,"Index(['Asia', 'N_Patient', 'FRAC_MALE', 'AGE_...","[Asia, N_Patient, FRAC_MALE, AGE_MED, Prior_Pa...","{'Capecitabine_Ind': 1.0, 'Carboplatin_Ind': 0...",False,violation,,0.5,False,processed-data/data_train_GINONV_34.csv,,[0],
CONSTITUTIONAL_34,{'results/rf/vAAAI-23_CHEMOexample_CONSTITUTIO...,continuous,0,,0.081818,"Index(['Asia', 'N_Patient', 'FRAC_MALE', 'AGE_...","[Asia, N_Patient, FRAC_MALE, AGE_MED, Prior_Pa...","{'Capecitabine_Ind': 1.0, 'Carboplatin_Ind': 0...",False,violation,,0.5,False,processed-data/data_train_CONSTITUTIONAL_34.csv,,[0],
INFECTION_34,{'results/rf/vAAAI-23_CHEMOexample_INFECTION_3...,continuous,0,,0.075,"Index(['Asia', 'N_Patient', 'FRAC_MALE', 'AGE_...","[Asia, N_Patient, FRAC_MALE, AGE_MED, Prior_Pa...","{'Capecitabine_Ind': 1.0, 'Carboplatin_Ind': 0...",False,violation,,0.5,False,processed-data/data_train_INFECTION_34.csv,,[0],
OTHER_34,{'results/rf/vAAAI-23_CHEMOexample_OTHER_34_mo...,continuous,0,,0.100811,"Index(['Asia', 'N_Patient', 'FRAC_MALE', 'AGE_...","[Asia, N_Patient, FRAC_MALE, AGE_MED, Prior_Pa...","{'Capecitabine_Ind': 1.0, 'Carboplatin_Ind': 0...",False,violation,,0.5,False,processed-data/data_train_OTHER_34.csv,,[0],
DLT_PROP,{'results/rf/vAAAI-23_CHEMOexample_DLT_PROP_mo...,continuous,0,,0.637304,"Index(['Asia', 'N_Patient', 'FRAC_MALE', 'AGE_...","[Asia, N_Patient, FRAC_MALE, AGE_MED, Prior_Pa...","{'Capecitabine_Ind': 1.0, 'Carboplatin_Ind': 0...",False,violation,,0.5,False,processed-data/data_train_DLT_PROP.csv,,[0],
OS,{'results/rf/vAAAI-23_CHEMOexample_OS_model.cs...,continuous,1,,,"Index(['Asia', 'N_Patient', 'FRAC_MALE', 'AGE_...","[Asia, N_Patient, FRAC_MALE, AGE_MED, Prior_Pa...","{'Capecitabine_Ind': 1.0, 'Carboplatin_Ind': 0...",False,violation,,0.5,False,processed-data/data_train_OS.csv,,[0],


In [8]:
def init_conceptual_model(pt, contex_vars):
    model = ConcreteModel('chemo')

    N = list(pt.keys())

    ########### STEP 1: Define Decision Variables ###########
    # Create x variable, and initialize empty y for outcome values
    model.x = Var(N, domain=Reals)

    # Restrict some x to be binary
    x_binary = [i for i in N if '_Ind' in i]
    for i in x_binary:
        model.x[i].domain = Binary

    ########### STEP 2: Define Objective function ###########
        def obj_function(model):
            return 0
    model.OBJ = Objective(rule=obj_function, sense=minimize)

    ###### STEP 3: Add (optionally) any known constraints #####
    def constraint_rule1(model):
        return sum(model.x[i] for i in x_binary) <= 3
    model.Constraint1 = Constraint(rule=constraint_rule1)

    ###### STEP 4: Fix (optionally) any non-optimization variables #####
    def constraint_rule2(model, i):
        return model.x[i] == pt[i]
    model.Constraint2 = Constraint(contex_vars, rule=constraint_rule2)

    return model

In [9]:
conceptual_model = init_conceptual_model(pt, [i for i in gi.X_cols if i in pt.keys()])
final_model = opticl.optimization_MIP(conceptual_model, model_master)
opt = SolverFactory('glpk')
print('Solving...')
results = opt.solve(final_model) 
print('Done!')

Embedding constraints for Neutro4
Adding single model.
Embedding constraints for GINONV_34
Adding single model.
Embedding constraints for CONSTITUTIONAL_34
Adding single model.
Embedding constraints for INFECTION_34
Adding single model.
Embedding constraints for OTHER_34
Adding single model.
Embedding constraints for DLT_PROP
Adding single model.
Embedding objective function for OS
Adding single model.
Solving...
Done!
