# This code uses a specific lymph version!


## Model Setup
built on lymph 1.0.0.clin-trial

In [1]:
import numpy as np
import pandas as pd
from scipy.special import factorial
import scipy as sp
import emcee                      # inference and backends for sample storage
from multiprocessing import Pool  # for parallelization of the inference
import lymph

graph = {
    ('tumor', 'primary')  : ['I','II', 'III', 'IV','V','VII'], 
    ('lnl'  , 'I') :       ['II'],
    ('lnl'  , 'II') :       ['III'], 
    ('lnl'  , 'III'):       ['IV'], 
    ('lnl'  , 'IV') :       ['V'],
    ('lnl'  , 'V') :       [],
    ('lnl'  , 'VII') :       []
    
}
model = lymph.models.Midline(graph_dict= graph,tumor_state = 1, unilateral_kwargs={'allowed_states':[0,1], 'max_time':10}, use_central = True, use_midext_evo = False, marginalize_unknown= False)
model.set_modality('max_llh',spec = 1,sens = 1)

# Time prior with p(early) = 0.3
def binom_pmf(k: np.ndarray, n: int, p: float):
    """Binomial PMF"""
    if p > 1. or p < 0.:
        raise ValueError("Binomial prob must be btw. 0 and 1")
    q = (1. - p)
    binom_coeff = factorial(n) / (factorial(k) * factorial(n - k))
    return binom_coeff * p**k * q**(n - k)

def late_binomial(support: np.ndarray, p: float = 0.5) -> np.ndarray:
    """Parametrized binomial distribution."""
    return binom_pmf(support, n=support[-1], p=p)

max_t = 10
model.set_distribution('early',sp.stats.binom.pmf(np.arange(max_t+1), max_t, 0.3))
model.set_distribution('late', late_binomial)

load samples

In [2]:
import h5py
with h5py.File("trial_samples_central.h5", "r") as f:
    samples1 = f["chain"][...]
samples1 = samples1.reshape(-1, samples1.shape[-1])
print(samples1.shape)

(2160, 18)


former way to thin the original samples

In [5]:
# tau = 2000
# burnin = int(2 * np.max(tau))
# thin = int(0.5 * np.min(tau))
# samples1 = backend.get_chain(discard=burnin, flat=True, thin=thin)
# print("burn-in: {0}".format(burnin))
# print("thin: {0}".format(thin))
# print("flat chain shape: {0}".format(samples1.shape))
# print(backend.get_chain().shape)


In [3]:
sampled_mean = samples1.mean(axis = 0)
params = {'mixing': sampled_mean[0],
        'ipsi_primarytoI_spread': sampled_mean[1],
        'ipsi_primarytoII_spread': sampled_mean[2],
        'ipsi_primarytoIII_spread': sampled_mean[3],
        'ipsi_primarytoIV_spread': sampled_mean[4],
        'ipsi_primarytoV_spread': sampled_mean[5],
        'ipsi_primarytoVII_spread': sampled_mean[6],
        'contra_primarytoI_spread': sampled_mean[7],
        'contra_primarytoII_spread': sampled_mean[8],
        'contra_primarytoIII_spread': sampled_mean[9],
        'contra_primarytoIV_spread': sampled_mean[10],   
        'contra_primarytoV_spread': sampled_mean[11],
        'contra_primarytoVII_spread': sampled_mean[12],
        'ItoII_spread': sampled_mean[13],
        'IItoIII_spread': sampled_mean[14],
        'IIItoIV_spread': sampled_mean[15],
        'IVtoV_spread': sampled_mean[16],
        'late_p': sampled_mean[17]}
model.set_params(**params)
model.get_params()

{'midext_prob': 0.0,
 'ipsi_primarytoI_spread': 0.026599934089507535,
 'ipsi_primarytoII_spread': 0.3754362312489512,
 'ipsi_primarytoIII_spread': 0.07350634235991671,
 'ipsi_primarytoIV_spread': 0.009868764752471882,
 'ipsi_primarytoV_spread': 0.01608922143844808,
 'ipsi_primarytoVII_spread': 0.021790771223072873,
 'contra_primarytoI_spread': 0.0032833634932873815,
 'contra_primarytoII_spread': 0.025330185925201906,
 'contra_primarytoIII_spread': 0.0023198951662066233,
 'contra_primarytoIV_spread': 0.0028514226257283703,
 'contra_primarytoV_spread': 0.000656088933696782,
 'contra_primarytoVII_spread': 0.006324800116350196,
 'mixing': 0.22533811234978024,
 'ItoII_spread': 0.7470325433932157,
 'IItoIII_spread': 0.1444848004577465,
 'IIItoIV_spread': 0.16715051321273394,
 'IVtoV_spread': 0.17189301394039708,
 'late_p': 0.36914158720690937}

In [4]:
model.set_modality('treatment_diagnose', spec = 1, sens = 0.81)

In [5]:
dataset_full = pd.read_csv("data/cleaned.csv", header=[0,1,2]) #import data
dataset_USZ =  pd.read_csv("data/cleanedUSZ.csv", header=[0,1,2]) #import data

maxllh =  dataset_USZ['max_llh']
t_stage = dataset_USZ['info']
ipsi = maxllh.loc[:,'ipsi'].drop(['IIa','IIb','VIII','Ib','IX','VI','X','Ia'],axis = 1)[['I','II','III','IV','V','VII']]
contra = maxllh.loc[:,'contra'].drop(['IIa','IIb','VIII','Ib','IX','VI','X','Ia'],axis = 1)[['I','II','III','IV','V','VII']]
ipsi_header = header = pd.MultiIndex.from_product([ ['ipsi'], ['I','II','III','IV','V','VII']], names=['', ''])
contra_header = pd.MultiIndex.from_product([['contra'], ['I','II','III','IV','V','VII']], names=['', ''])
ipsi.columns = ipsi_header
contra.columns = contra_header

dataset_analyze = pd.concat([t_stage,ipsi,contra],axis = 1)


## Let's take a look at some examples


reduce the samples to the needed amount

In [7]:
from sparing_scripts import sample_from_flattened

samples_reduced = sample_from_flattened(samples1, num_samples = 216, spaced = True, step_size = 10)

note: right now there are two versions of `levels to spare`. The old version just excludes LNLs from the nCTV until excluding the next LNL would result in a risk above the threshold. Then it checks whether the 95%CI exceedes the threshold. If yes, the next LNL is also irradiated. The new version instead always checks whether the upper 95%CI exceeds the threshold if yes. we stop excluding LNLs. In most cases both do the same. However in 13 cases of all combinations, there was a slight difference, where the new version includes one LNL more.

In [8]:
from sparing_scripts import risk_sampled, levels_to_spare_old, levels_to_spare, ci_single

diagnose = {"ipsi": {'treatment_diagnose':{
        "I": 0,
        "II": 1,
        "III": 0,
        "IV": 0,
        "V": 0,
        "VII": 0
    }},
    "contra": {'treatment_diagnose':{
        "I": 0,
        "II": 0,
        "III": 0,
        "IV": 0,
        "V": 0,
        "VII": 0
    }}}
sampled_risks, risk = risk_sampled(samples = samples_reduced, model = model, t_stage = 'early', given_diagnoses= diagnose,central = None, midline_extension= True)     
spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra, sampled_total_risks = levels_to_spare_old(0.10, model, risk, sampled_risks)
print(treated_lnls)
print(total_risk*100)
print(spared_lnls)
ci_single(sampled_total_risks)*100

[('contra II', 0.06700558976741193), ('ipsi III', 0.09544252013137532), ('ipsi II', 1.0000000000000004)]
7.803191450136049
[('contra V', 0.0027096298969616446), ('contra IV', 0.002879082831950057), ('contra I', 0.0028877936682658477), ('contra VII', 0.006292845036804311), ('ipsi IV', 0.009386054215524034), ('ipsi V', 0.01086744558213284), ('contra III', 0.012683190788655168), ('ipsi VII', 0.01439047847542773), ('ipsi I', 0.02018212044831703)]


array([6.63026685, 9.14726159])

In [10]:
sampled_risks, risk = risk_sampled(samples = samples_reduced, model = model, t_stage = 'early', given_diagnoses= diagnose,central = None, midline_extension= False)     
spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra, sampled_total_risks = levels_to_spare(0.10, model, risk, sampled_risks, ci = False)
print(treated_lnls)
print(total_risk*100)
print(spared_lnls)
ci_single(sampled_total_risks)*100

[('ipsi III', 0.08275283447364559), ('ipsi II', 1.0000000000000002)]
6.629766388350969
[('contra V', 0.00042763173316175606), ('contra I', 0.0009891023055616907), ('contra IV', 0.0014467123203597062), ('contra III', 0.0015009689751277794), ('contra VII', 0.0036182001687979333), ('ipsi IV', 0.007896204657586742), ('ipsi V', 0.009609714097105536), ('ipsi VII', 0.012807244016318392), ('contra II', 0.013067004381272224), ('ipsi I', 0.018191576282428446)]


array([5.75407088, 7.54938061])

## Combination analysis

Here we produce all possible combinations of diagnoses to compute the risks

In [10]:
from collections import Counter
from collections import defaultdict


# Sample array with different entry combinations
data = np.array(dataset_analyze)

entry_combinations_with_indexes = defaultdict(list)
for index, row in enumerate(data):
    combination = tuple(row)
    entry_combinations_with_indexes[combination].append(index)
USZ_counts = []
USZ_combinations = []
USZ_indexes = []
for combination, indexes in entry_combinations_with_indexes.items():
    count = len(indexes)
    USZ_indexes.append(indexes)
    USZ_counts.append(count)
    USZ_combinations.append(combination)

lnls = ['I','II', 'III', 'IV','V', 'VII']
t_stage = []
midline_extension = []
invovlvement_ipsi_USZ = []
invovlvement_contra_USZ = []
for diagnose_type in USZ_combinations:
    involved_ipsi = []
    involved_contra = []
    t_stage.append(diagnose_type[0])
    midline_extension.append(diagnose_type[1])
    for lnl_looper, involved_level in enumerate(lnls):
        if diagnose_type[lnl_looper +2] == True:
            involved_ipsi.append(involved_level) 
        if diagnose_type[lnl_looper +8] == True:
            involved_contra.append(involved_level)
    invovlvement_ipsi_USZ.append(involved_ipsi)
    invovlvement_contra_USZ.append(involved_contra)

first only check the USZ dataset for relevant entries

In [12]:
from sparing_scripts import analysis_treated_lnls_combinations_old, count_number_treatments, analysis_treated_lnls_combinations
usz_treated_lnls_no_risk, usz_treated_lnls_all, usz_treatment_array, usz_top3_spared, usz_total_risks, usz_treated_ipsi, usz_treated_contra, usz_sampled_risks_array, usz_lnls_ranked, cis = analysis_treated_lnls_combinations(combinations = USZ_combinations, model = model, samples = samples_reduced, threshold = 0.10)
usz_set_counts = count_number_treatments(usz_treated_lnls_no_risk)
len(usz_set_counts)

41

In [13]:
from sparing_scripts import ci_multiple
df = pd.DataFrame(usz_treatment_array)
df.to_csv('treatment_array_new_dataset.csv')
ci = ci_multiple(usz_sampled_risks_array)
data_export_usz = pd.DataFrame({'Percentage of patients': np.array(USZ_counts)/287,
                                'T-stage': t_stage,
                                'Midline Extension': midline_extension,
                                'Involvement Ipsi' : invovlvement_ipsi_USZ,
                                'Involvement Contra': invovlvement_contra_USZ,
                                'Treated Ipsi':  usz_treated_ipsi,
                                'Treated Contra': usz_treated_contra,
                                'risk': usz_total_risks,
                                'lower bound': ci.T[0],
                                'upper bound': ci.T[1],
                                'top 3 spared lnls risk': usz_top3_spared

})
# data_export_usz.to_csv('analyzed_usz_data_new_dataset.csv', sep = ';', index = False)
# data_export_usz.sort_values(by = 'Percentage of patients', ascending = False, inplace = True)
data_export_usz

Unnamed: 0,Percentage of patients,T-stage,Midline Extension,Involvement Ipsi,Involvement Contra,Treated Ipsi,Treated Contra,risk,lower bound,upper bound,top 3 spared lnls risk
0,0.048780,late,True,[II],[],"[III, II]",[II],0.078032,0.066303,0.091473,"[(ipsi I, 0.02018212044831703), (ipsi VII, 0.0..."
1,0.010453,early,False,[II],[II],"[III, II]","[III, II]",0.076328,0.062081,0.097589,"[(ipsi I, 0.020745164441888973), (contra I, 0...."
2,0.003484,late,True,"[I, II, III, IV, VII]","[I, II, III, IV]","[V, I, II, III, IV, VII]","[I, II, III, IV]",0.061390,0.034468,0.095027,"[(contra V, 0.049917866263852986), (contra VII..."
3,0.003484,late,True,"[II, III, IV, VII]",[],"[V, VII, II, III, IV]",[II],0.066456,0.055051,0.077370,"[(ipsi I, 0.028686386918113108), (contra III, ..."
4,0.010453,early,False,"[II, VII]",[],"[III, II, VII]",[],0.063806,0.054496,0.073761,"[(ipsi I, 0.021186904157757556), (contra II, 0..."
...,...,...,...,...,...,...,...,...,...,...,...
72,0.003484,early,False,"[II, IV]",[],"[V, III, II, IV]",[],0.062058,0.052172,0.072464,"[(ipsi I, 0.022471174486049225), (ipsi VII, 0...."
73,0.006969,late,False,"[II, III, V]",[],"[IV, II, III, V]",[],0.075202,0.062002,0.087899,"[(ipsi I, 0.02735384799828265), (ipsi VII, 0.0..."
74,0.003484,late,True,"[II, III]","[II, III, IV]","[I, IV, II, III]","[V, II, III, IV]",0.066285,0.054837,0.080623,"[(ipsi VII, 0.02303066023513181), (ipsi V, 0.0..."
75,0.003484,late,False,"[II, V]",[],"[IV, III, II, V]",[],0.066396,0.054162,0.077747,"[(ipsi I, 0.024086145154078375), (ipsi VII, 0...."


now let's go for all possible combinations

In [None]:
from sparing_scripts import change_base

def produce_combinations_list(array):
    combinations_list = []
    for entry in array:
        combination = []
        for index, cells in enumerate(entry):
            if index == 0:
                combination.append('early') if cells == 0 else combination.append('late')
            else:
                combination.append(False) if cells == 0 else combination.append(True)
        combination = tuple(combination)
        combinations_list.append(combination)
    return(combinations_list)

combination_array = np.zeros((2**14,14))
for i in range(2**14):
    combination_array[i] = [
        int(digit) for digit in change_base(i, 2, length=14)
    ]

all_combinations = produce_combinations_list(combination_array)

here we do it with the new version

In [None]:
import multiprocessing as mp

# Function to process a chunk of combinations
def process_combinations(chunk):
    return analysis_treated_lnls_combinations(chunk, samples_reduced, model)

# Divide the combinations into chunks
num_cores = mp.cpu_count() - 1
chunk_size = len(all_combinations) // num_cores
chunks = [all_combinations[i:i + chunk_size] for i in range(0, len(all_combinations), chunk_size)]

# Use multiprocessing to process the chunks
with mp.Pool(num_cores) as pool:
    results = pool.map(process_combinations, chunks)

# Combine the results from all chunks
treated_lnls_no_risk, treated_lnls_all, treatment_array, top3_spared, total_risks, treated_ipsi, treated_contra, sampled_risks_array, lnls_ranked, cis = zip(*results)

# Flatten the results
treated_lnls_no_risk = [item for sublist in treated_lnls_no_risk for item in sublist]
treated_lnls_all = [item for sublist in treated_lnls_all for item in sublist]
treatment_array = np.vstack(treatment_array)
top3_spared = [item for sublist in top3_spared for item in sublist]
total_risks = np.concatenate(total_risks)
treated_ipsi = [item for sublist in treated_ipsi for item in sublist]
treated_contra = [item for sublist in treated_contra for item in sublist]
sampled_risks_array = np.vstack(sampled_risks_array)
lnls_ranked = [item for sublist in lnls_ranked for item in sublist]
cis_lower = []
cis_upper = []
for item in cis:
    cis_lower.append(item[0])
    cis_upper.append(item[1])
flat_lower = [item for sublist in cis_lower for item in sublist]
flat_upper = [item for sublist in cis_upper for item in sublist]

In [38]:
sampled_risks_early_no_ext, mean_risk_early_no_ext = risk_sampled(samples_reduced, model, 'early', midline_extension = False, given_diagnoses = None) 
sampled_risks_early_ext, mean_risk_early_ext = risk_sampled(samples_reduced, model, 'early', midline_extension = True, given_diagnoses = None)
sampled_risks_late_no_ext, mean_risk_late_no_ext = risk_sampled(samples_reduced, model, 'late', midline_extension = False, given_diagnoses = None)
sampled_risks_late_ext, mean_risk_late_ext = risk_sampled(samples_reduced, model, 'late', midline_extension = True, given_diagnoses = None)



In [39]:
#generate state list
state_list = np.array(np.meshgrid(*[[0, 1]] * 14)).T.reshape(-1, 14)
state_list = state_list[np.lexsort(np.fliplr(state_list).T)]
# Reshape the risk arrays into 1x4096 arrays
mean_risk_early_noext_flat = mean_risk_early_no_ext.reshape(-1)
mean_risk_early_ext_flat = mean_risk_early_ext.reshape(-1)
mean_risk_late_noext_flat = mean_risk_late_no_ext.reshape(-1)
mean_risk_late_ext_flat = mean_risk_late_ext.reshape(-1)
#combine them
full_risks = np.hstack([mean_risk_early_noext_flat, mean_risk_early_ext_flat, mean_risk_late_noext_flat, mean_risk_late_ext_flat])/4

In [40]:
lnls = ['I','II', 'III', 'IV','V', 'VII']
t_stage = []
midline_extension = []
invovlvement_ipsi = []
invovlvement_contra = []
for diagnose_type in all_combinations:
    involved_ipsi = []
    involved_contra = []
    t_stage.append(diagnose_type[0])
    midline_extension.append(diagnose_type[1])
    for lnl_looper, involved_level in enumerate(lnls):
        if diagnose_type[lnl_looper +2] == True:
            involved_ipsi.append(involved_level) 
        if diagnose_type[lnl_looper +8] == True:
            involved_contra.append(involved_level)
    invovlvement_ipsi.append(involved_ipsi)
    invovlvement_contra.append(involved_contra)

In [None]:
data_export = pd.DataFrame({'Percentage of patients': full_risks,
                                'T-stage': t_stage,
                                'Midline Extension': midline_extension,
                                'Involvement Ipsi' : invovlvement_ipsi,
                                'Involvement Contra': invovlvement_contra,
                                'Treated Ipsi':  treated_ipsi,
                                'Treated Contra': treated_contra,
                                'risk': total_risks,
                                'lower bound': flat_lower,
                                'upper bound': flat_upper,
                                'top 3 spared lnls risk': top3_spared,
                                'lnls ranked': lnls_ranked
})
# data_export.to_csv('lymph_1_midline_full_table_new_code.csv', sep = ';', index = True)

In [None]:
#generate state list
state_list = np.array(np.meshgrid(*[[0, 1]] * 14)).T.reshape(-1, 14)
state_list = state_list[np.lexsort(np.fliplr(state_list).T)]
# Reshape the risk arrays into 1x4096 arrays
mean_risk_early_noext_flat = mean_risk_early_no_ext.reshape(-1)
mean_risk_early_ext_flat = mean_risk_early_ext.reshape(-1)
mean_risk_late_noext_flat = mean_risk_late_no_ext.reshape(-1)
mean_risk_late_ext_flat = mean_risk_late_ext.reshape(-1)
#combine them
full_risks = np.hstack([mean_risk_early_noext_flat, mean_risk_early_ext_flat, mean_risk_late_noext_flat, mean_risk_late_ext_flat])/4

In [None]:
lnls = ['I','II', 'III', 'IV','V', 'VII']
t_stage = []
midline_extension = []
invovlvement_ipsi = []
invovlvement_contra = []
for diagnose_type in all_combinations:
    involved_ipsi = []
    involved_contra = []
    t_stage.append(diagnose_type[0])
    midline_extension.append(diagnose_type[1])
    for lnl_looper, involved_level in enumerate(lnls):
        if diagnose_type[lnl_looper +2] == True:
            involved_ipsi.append(involved_level) 
        if diagnose_type[lnl_looper +8] == True:
            involved_contra.append(involved_level)
    invovlvement_ipsi.append(involved_ipsi)
    invovlvement_contra.append(involved_contra)

In [None]:
data_export = pd.DataFrame({'Percentage of patients': full_risks,
                                'T-stage': t_stage,
                                'Midline Extension': midline_extension,
                                'Involvement Ipsi' : invovlvement_ipsi,
                                'Involvement Contra': invovlvement_contra,
                                'Treated Ipsi':  treated_ipsi,
                                'Treated Contra': treated_contra,
                                'risk': total_risks,
                                'lower bound': flat_lower,
                                'upper bound': flat_upper,
                                'top 3 spared lnls risk': top3_spared,
                                'lnls ranked': lnls_ranked
})
# data_export.to_csv('lymph_1_midline_full_table_new_code.csv', sep = ';', index = True)

In [None]:
import ast
data_export = pd.read_csv('lymph_1_midline_full_table_new_code.csv', sep = ';',index_col = 0)

# Convert the 'top 3 spared lnls risk' column entries from string to list
data_export['top 3 spared lnls risk'] = data_export['top 3 spared lnls risk'].apply(ast.literal_eval)

old_version

In [45]:
import multiprocessing as mp

# Function to process a chunk of combinations
def process_combinations(chunk):
    return analysis_treated_lnls_combinations_old(chunk, samples_reduced, model)

# Divide the combinations into chunks
num_cores = mp.cpu_count() - 1
chunk_size = len(all_combinations) // num_cores
chunks = [all_combinations[i:i + chunk_size] for i in range(0, len(all_combinations), chunk_size)]

# Use multiprocessing to process the chunks
with mp.Pool(num_cores) as pool:
    results = pool.map(process_combinations, chunks)

# Combine the results from all chunks
old_treated_lnls_no_risk, old_treated_lnls_all, old_treatment_array, old_top3_spared, old_total_risks, old_treated_ipsi, old_treated_contra, old_sampled_risks_array = zip(*results)


# Flatten the results
old_treated_lnls_no_risk = [item for sublist in old_treated_lnls_no_risk for item in sublist]
old_treated_lnls_all = [item for sublist in old_treated_lnls_all for item in sublist]
old_treatment_array = np.vstack(old_treatment_array)
old_top3_spared = [item for sublist in old_top3_spared for item in sublist]
old_total_risks = np.concatenate(old_total_risks)
old_treated_ipsi = [item for sublist in old_treated_ipsi for item in sublist]
old_treated_contra = [item for sublist in old_treated_contra for item in sublist]
old_sampled_risks_array = np.vstack(old_sampled_risks_array)


In [None]:
data_export_old = pd.DataFrame({'Percentage of patients': full_risks,
                                'T-stage': t_stage,
                                'Midline Extension': midline_extension,
                                'Involvement Ipsi' : invovlvement_ipsi,
                                'Involvement Contra': invovlvement_contra,
                                'Treated Ipsi':  old_treated_ipsi,
                                'Treated Contra': old_treated_contra,
                                'risk': old_total_risks,
                                'top 3 spared lnls risk': old_top3_spared,
})
# data_export_old.to_csv('lymph_1_midline_full_table_old_code.csv', sep = ';', index = True)

In [None]:
unequal_indices = ((data_export_old['Treated Ipsi'] != data_export['Treated Ipsi']) | (data_export_old['Treated Contra'] != data_export['Treated Contra']))
data_export.loc[unequal_indices]

Unnamed: 0,Percentage of patients,T-stage,Midline Extension,Involvement Ipsi,Involvement Contra,Treated Ipsi,Treated Contra,risk,lower bound,upper bound,top 3 spared lnls risk,lnls ranked
8579,1.09712e-08,late,False,"[IV, V]","[V, VII]","[I, III, II, IV, V]","[II, IV, V, VII]",0.039771,0.024026,0.07954,"[(ipsi VII, 0.023331219264256325), (contra III...","[(contra I, 0.001309072629178759), (contra III..."
8707,2.589906e-07,late,False,[III],"[V, VII]","[I, IV, II, III]","[II, IV, V, VII]",0.051892,0.035446,0.085536,"[(ipsi VII, 0.020839115355941837), (ipsi V, 0....","[(contra I, 0.0012401274991280047), (contra II..."
9090,7.348945e-07,late,False,"[III, IV, V]",[V],"[I, II, III, IV, V]","[II, IV, V]",0.047262,0.029919,0.088479,"[(ipsi VII, 0.023844022858435666), (contra III...","[(contra I, 0.0013236194799362716), (contra VI..."
9091,2.256146e-08,late,False,"[III, IV, V]","[V, VII]","[I, II, III, IV, V]","[II, IV, V, VII]",0.044042,0.02609,0.089937,"[(ipsi VII, 0.025254577021408965), (contra III...","[(contra I, 0.001362441319108081), (contra III..."
9602,1.915659e-06,late,False,"[II, IV, V]",[V],"[I, III, II, IV, V]","[II, IV, V]",0.045762,0.028984,0.084292,"[(ipsi VII, 0.02324492147828613), (contra III,...","[(contra I, 0.001306521612964222), (contra VII..."
9603,6.128137e-08,late,False,"[II, IV, V]","[V, VII]","[I, III, II, IV, V]","[II, IV, V, VII]",0.042858,0.025659,0.086275,"[(ipsi VII, 0.024738313333915865), (contra III...","[(contra I, 0.001347687283539596), (contra III..."
9730,0.0001071213,late,False,"[II, III]",[V],"[I, IV, II, III]","[II, IV, V]",0.055447,0.038675,0.086678,"[(ipsi VII, 0.020328694799464867), (ipsi V, 0....","[(contra I, 0.0012259671883361732), (contra VI..."
9859,2.550575e-07,late,False,"[II, III, V]","[V, VII]","[I, IV, II, III, V]","[II, IV, V, VII]",0.041335,0.024692,0.083593,"[(ipsi VII, 0.02403209669871501), (contra III,...","[(contra I, 0.0013288089111187311), (contra II..."
9986,3.352292e-05,late,False,"[II, III, IV]",[V],"[I, V, II, III, IV]","[II, IV, V]",0.044175,0.028272,0.081724,"[(ipsi VII, 0.022609269255236077), (contra III...","[(contra I, 0.0012893984724390664), (contra VI..."
9987,1.102958e-06,late,False,"[II, III, IV]","[V, VII]","[I, V, II, III, IV]","[II, IV, V, VII]",0.041283,0.024792,0.083523,"[(ipsi VII, 0.024048338204647267), (contra III...","[(contra I, 0.0013289073929502369), (contra II..."


In [64]:
data_export_old.loc[unequal_indices]

Unnamed: 0,Percentage of patients,T-stage,Midline Extension,Involvement Ipsi,Involvement Contra,Treated Ipsi,Treated Contra,risk,top 3 spared lnls risk
8579,1.09712e-08,late,False,"[IV, V]","[V, VII]","[III, II, IV, V]","[II, IV, V, VII]",0.080568,"[(ipsi I, 0.023518627836490686), (ipsi VII, 0...."
8707,2.589906e-07,late,False,[III],"[V, VII]","[I, IV, II, III]","[IV, V, VII]",0.090305,"[(contra II, 0.021383033141971704), (ipsi VII,..."
9090,7.348945e-07,late,False,"[III, IV, V]",[V],"[I, II, III, IV, V]","[IV, V]",0.090523,"[(contra II, 0.024453612089688912), (ipsi VII,..."
9091,2.256146e-08,late,False,"[III, IV, V]","[V, VII]","[I, II, III, IV, V]","[IV, V, VII]",0.090033,"[(contra II, 0.025933860291638983), (ipsi VII,..."
9602,1.915659e-06,late,False,"[II, IV, V]",[V],"[I, III, II, IV, V]","[IV, V]",0.09289,"[(contra II, 0.02383567141891073), (ipsi VII, ..."
9603,6.128137e-08,late,False,"[II, IV, V]","[V, VII]","[I, III, II, IV, V]","[IV, V, VII]",0.092555,"[(contra II, 0.025391958255796564), (ipsi VII,..."
9730,0.0001071213,late,False,"[II, III]",[V],"[I, IV, II, III]","[IV, V]",0.097422,"[(contra II, 0.020853956964067905), (ipsi VII,..."
9859,2.550575e-07,late,False,"[II, III, V]","[V, VII]","[I, IV, II, III, V]","[IV, V, VII]",0.090239,"[(contra II, 0.024668441449258026), (ipsi VII,..."
9986,3.352292e-05,late,False,"[II, III, IV]",[V],"[I, V, II, III, IV]","[IV, V]",0.090478,"[(contra II, 0.023155732577190856), (ipsi VII,..."
9987,1.102958e-06,late,False,"[II, III, IV]","[V, VII]","[I, V, II, III, IV]","[IV, V, VII]",0.090154,"[(contra II, 0.024646484358220436), (ipsi VII,..."


comparison

In [27]:
from sparing_scripts import risk_sampled, levels_to_spare_old, levels_to_spare, ci_single

diagnose = {"ipsi": {'treatment_diagnose':{
        "I": 0,
        "II": 0,
        "III": 1,
        "IV": 0,
        "V": 0,
        "VII": 0
    }},
    "contra": {'treatment_diagnose':{
        "I": 0,
        "II": 0,
        "III": 0,
        "IV": 0,
        "V": 1,
        "VII": 1
    }}}
sampled_risks, risk = risk_sampled(samples = samples_reduced, model = model, t_stage = 'late', given_diagnoses= diagnose,central = None, midline_extension= False)     
spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra, sampled_total_risks = levels_to_spare_old(0.10, model, risk, sampled_risks)
print(treated_lnls)
print(total_risk*100)
print(spared_lnls)
ci_single(sampled_total_risks)*100

[('ipsi I', 0.023081628360334616), ('ipsi IV', 0.0662893072766804), ('contra IV', 0.2627987721919866), ('ipsi II', 0.7180653085834683), ('ipsi III', 0.9999999999999996), ('contra V', 0.9999999999999997), ('contra VII', 0.9999999999999997)]
9.030513330383124
[('contra I', 0.0012401274991280047), ('contra III', 0.01273980440309123), ('ipsi V', 0.01818739316093681), ('ipsi VII', 0.020839115355941837), ('contra II', 0.021383033141971704)]


array([ 6.97344703, 12.32359009])

In [28]:
sampled_risks, risk = risk_sampled(samples = samples_reduced, model = model, t_stage = 'late', given_diagnoses= diagnose,central = None, midline_extension= False)     
spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra, sampled_total_risks = levels_to_spare(0.10, model, risk, sampled_risks, ci = True)
print(treated_lnls)
print(total_risk*100)
print(spared_lnls)
ci_single(sampled_total_risks)*100

[('contra II', 0.021383033141971704), ('ipsi I', 0.023081628360334616), ('ipsi IV', 0.0662893072766804), ('contra IV', 0.2627987721919866), ('ipsi II', 0.7180653085834683), ('ipsi III', 0.9999999999999996), ('contra V', 0.9999999999999997), ('contra VII', 0.9999999999999997)]
5.189151867385004
[('contra I', 0.0012401274991280047), ('contra III', 0.01273980440309123), ('ipsi V', 0.01818739316093681), ('ipsi VII', 0.020839115355941837)]


array([3.54457241, 8.55360095])

## Repetition for central tumors

## New central

In [None]:
from sparing_scripts import risk_sampled, levels_to_spare, ci_single

diagnose = {"ipsi": {'treatment_diagnose':{
        "I": 0,
        "II": 0,
        "III": 1,
        "IV": 0,
        "V": 0,
        "VII": 0
    }},
    "contra": {'treatment_diagnose':{
        "I": 0,
        "II": 0,
        "III": 0,
        "IV": 0,
        "V": 1,
        "VII": 1
    }}}
sampled_risks, risk = risk_sampled(samples = samples_reduced, model = model, t_stage = 'late', given_diagnoses= diagnose,central = True)     
spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra, sampled_total_risks = levels_to_spare(0.10, model, risk, sampled_risks, ci = True)
print(treated_lnls)
print(total_risk*100)
print(spared_lnls)
ci_single(sampled_total_risks)*100

[('contra IV', 0.05004813899049851), ('ipsi IV', 0.054878036344685056), ('contra III', 0.10166949825871423), ('contra II', 0.4691304590630866), ('ipsi II', 0.6429899580924838), ('ipsi III', 0.9999999999999998), ('contra V', 0.9999999999999999), ('contra VII', 0.9999999999999999)]
6.639454931936219
[('ipsi V', 0.015057652311357403), ('contra I', 0.016134436601919648), ('ipsi VII', 0.01787572757738947), ('ipsi I', 0.019300828124780605)]


array([5.35419414, 8.08110324])

In [None]:
combination_array_central = np.zeros((2**13,13))
for i in range(2**13):
    combination_array_central[i] = [
        int(digit) for digit in change_base(i, 2, length=13)
    ]

all_combinations_central = produce_combinations_list(combination_array_central)

In [None]:
analysis_treated_lnls_combinations(combinations = all_combinations_central, samples = samples1, model = model, threshold = 0.10,central = True)
set_counts_central = count_number_treatments(treated_lnls_no_risk_central)

In [None]:
import multiprocessing as mp

# Function to process a chunk of combinations
def process_combinations(chunk):
    return analysis_treated_lnls_combinations(chunk, samples_reduced, model, central = True)

# Divide the combinations into chunks
num_cores = mp.cpu_count() - 2
chunk_size = len(all_combinations_central) // num_cores
chunks = [all_combinations_central[i:i + chunk_size] for i in range(0, len(all_combinations_central), chunk_size)]

# Use multiprocessing to process the chunks
with mp.Pool(num_cores) as pool:
    results = pool.map(process_combinations, chunks)

# Combine the results from all chunks
treated_lnls_no_risk, treated_lnls_all, treatment_array, top3_spared, total_risks, treated_ipsi, treated_contra, sampled_risks_array, lnls_ranked, cis = zip(*results)

# Flatten the results
treated_lnls_no_risk = [item for sublist in treated_lnls_no_risk for item in sublist]
treated_lnls_all = [item for sublist in treated_lnls_all for item in sublist]
treatment_array = np.vstack(treatment_array)
top3_spared = [item for sublist in top3_spared for item in sublist]
total_risks = np.concatenate(total_risks)
treated_ipsi = [item for sublist in treated_ipsi for item in sublist]
treated_contra = [item for sublist in treated_contra for item in sublist]
sampled_risks_array = np.vstack(sampled_risks_array)
lnls_ranked = [item for sublist in lnls_ranked for item in sublist]
cis_lower = []
cis_upper = []
for item in cis:
    cis_lower.append(item[0])
    cis_upper.append(item[1])
flat_lower = [item for sublist in cis_lower for item in sublist]
flat_upper = [item for sublist in cis_upper for item in sublist]

In [None]:
sampled_risks_early, mean_risk_early = risk_sampled(samples_reduced, model, 'early', central = True, given_diagnoses = None) 
sampled_risks_late, mean_risk_late = risk_sampled(samples_reduced, model, 'late', central = True, given_diagnoses = None)

In [None]:
#generate state list
state_list = np.array(np.meshgrid(*[[0, 1]] * 13)).T.reshape(-1, 13)
state_list = state_list[np.lexsort(np.fliplr(state_list).T)]
# Reshape the risk arrays into 1x4096 arrays
mean_risk_early = mean_risk_early_no_ext.reshape(-1)
mean_risk_late = mean_risk_late_no_ext.reshape(-1)
#combine them
full_risks = np.hstack([mean_risk_early, mean_risk_late])/2

In [None]:
lnls = ['I','II', 'III', 'IV','V', 'VII']
t_stage = []
invovlvement_ipsi = []
invovlvement_contra = []
for diagnose_type in all_combinations:
    involved_ipsi = []
    involved_contra = []
    t_stage.append(diagnose_type[0])
    for lnl_looper, involved_level in enumerate(lnls):
        if diagnose_type[lnl_looper +1] == True:
            involved_ipsi.append(involved_level) 
        if diagnose_type[lnl_looper +7] == True:
            involved_contra.append(involved_level)
    invovlvement_ipsi.append(involved_ipsi)
    invovlvement_contra.append(involved_contra)

In [None]:
data_export = pd.DataFrame({'Percentage of patients': full_risks,
                                'T-stage': t_stage,
                                'Involvement Ipsi' : invovlvement_ipsi,
                                'Involvement Contra': invovlvement_contra,
                                'Treated Ipsi':  treated_ipsi,
                                'Treated Contra': treated_contra,
                                'risk': total_risks,
                                'lower bound': flat_lower,
                                'upper bound': flat_upper,
                                'top 3 spared lnls risk': top3_spared,
                                'lnls ranked': lnls_ranked
})
data_export.to_csv('lymph_1_midline_full_table_central_new_code.csv', sep = ';', index = True)

## Old Central

In [None]:
combination_array_central = np.zeros((2**13,13))
for i in range(2**13):
    combination_array_central[i] = [
        int(digit) for digit in change_base(i, 2, length=13)
    ]

all_combinations_central = produce_combinations_list(combination_array_central)

In [None]:
import multiprocessing as mp
from sparing_scripts import analysis_treated_lnls_combinations_central_old

# Function to process a chunk of combinations
def process_combinations(chunk):
    return analysis_treated_lnls_combinations_central_old(chunk, model, samples_reduced)

# Divide the combinations into chunks
num_cores = mp.cpu_count() - 2
chunk_size = len(all_combinations_central) // num_cores
chunks = [all_combinations_central[i:i + chunk_size] for i in range(0, len(all_combinations_central), chunk_size)]

# Use multiprocessing to process the chunks
with mp.Pool(num_cores) as pool:
    results = pool.map(process_combinations, chunks)

# Combine the results from all chunks
treated_lnls_no_risk_central, treated_lnls_all_central, treatment_array_central, top3_spared_central, total_risks_central, treated_ipsi_central, treated_contra_central, sampled_risks_array_central = zip(*results)

# Flatten the results
treated_lnls_no_risk_central = [item for sublist in treated_lnls_no_risk_central for item in sublist]
treated_lnls_all_central = [item for sublist in treated_lnls_all_central for item in sublist]
treatment_array_central = np.vstack(treatment_array_central)
top3_spared_central = [item for sublist in top3_spared_central for item in sublist]
total_risks_central = np.concatenate(total_risks_central)
treated_ipsi_central = [item for sublist in treated_ipsi_central for item in sublist]
treated_contra_central = [item for sublist in treated_contra_central for item in sublist]
sampled_risks_array_central = np.vstack(sampled_risks_array_central)

In [None]:
lnls = ['I','II', 'III', 'IV','V', 'VII']
involvement_ipsi_central = []
involvement_contra_central = []
for diagnose_type in all_combinations_central:
    involved_ipsi = []
    involved_contra = []
    for lnl_looper, involved_level in enumerate(lnls):
        if diagnose_type[lnl_looper +1] == True:
            involved_ipsi.append(involved_level) 
        if diagnose_type[lnl_looper +7] == True:
            involved_contra.append(involved_level)
    involvement_ipsi_central.append(involved_ipsi)
    involvement_contra_central.append(involved_contra)

In [None]:
df = pd.DataFrame(treatment_array_central)
df.to_csv('central_treatment_array_full_trial.csv')
data_export = pd.DataFrame({'T-stage': np.array(all_combinations_central)[:,0],
                                'Involvement Ipsi' : involvement_ipsi_central,
                                'Involvement Contra': involvement_contra_central,
                                'Treated Ipsi':  treated_ipsi_central,
                                'Treated Contra': treated_contra_central,
                                'risk': total_risks_central,
                                'top 3 spared lnls risk': top3_spared_central
})
data_export.to_csv('lymph_1_midline_full_table_central_old_code.csv', sep = ';', index = False)