In [1]:
import numpy as np
import lymph
import pandas as pd
import scipy as sp
from scipy.special import factorial
import matplotlib.pyplot as plt
import emcee                      # inference and backends for sample storage
from multiprocessing import Pool  # for parallelization of the inference


dataset_full = pd.read_csv("lynference/data/cleaned.csv", header=[0,1,2]) #import data
dataset_USZ =  pd.read_csv("lynference/data/cleanedUSZ.csv", header=[0,1,2]) #import data


maxllh =  dataset_USZ['max_llh']
t_stage = dataset_USZ['info']
ipsi = maxllh.loc[:,'ipsi'].drop(['IIa','IIb','VIII','Ib','IX','VI','X','Ia'],axis = 1)[['I','II','III','IV','V','VII']]
contra = maxllh.loc[:,'contra'].drop(['IIa','IIb','VIII','Ib','IX','VI','X','Ia'],axis = 1)[['I','II','III','IV','V','VII']]
ipsi_header = header = pd.MultiIndex.from_product([ ['ipsi'], ['I','II','III','IV','V','VII']], names=['', ''])
contra_header = pd.MultiIndex.from_product([['contra'], ['I','II','III','IV','V','VII']], names=['', ''])
ipsi.columns = ipsi_header
contra.columns = contra_header

dataset_analyze = pd.concat([t_stage,ipsi,contra],axis = 1)

dataset_full

Unnamed: 0_level_0,FNA,FNA,FNA,FNA,FNA,FNA,FNA,FNA,FNA,FNA,...,diagnostic_consensus,diagnostic_consensus,diagnostic_consensus,diagnostic_consensus,diagnostic_consensus,diagnostic_consensus,diagnostic_consensus,diagnostic_consensus,info,info
Unnamed: 0_level_1,contra,contra,contra,contra,contra,contra,contra,contra,contra,contra,...,contra,contra,contra,contra,contra,contra,ipsi,contra,tumor,tumor
Unnamed: 0_level_2,I,Ia,Ib,II,IIa,IIb,III,IV,V,VI,...,Ib,II,III,IV,V,VII,I,I,t_stage,midline_extension
0,,,,False,,,False,,,,...,,,,,,,,,late,True
1,,,,True,True,,,,,,...,,,,,,,,,early,False
2,,,,,,,,,,,...,,,,,,,,,late,True
3,,,,,,,,,,,...,,,,,,,,,late,True
4,,,,,,,,,,,...,,,,,,,,,early,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,,,,,,,,,,,...,False,False,False,False,False,,False,False,early,False
546,,,,,,,,,,,...,False,False,False,False,False,,False,False,late,True
547,,,,,,,,,,,...,False,False,False,False,False,,False,False,early,False
548,,,,,,,,,,,...,False,False,False,False,False,,False,False,early,False


In [2]:
graph = {
    ('tumor', 'primary')  : ['I','II', 'III', 'IV','V', 'VII'],
    ('lnl'  , 'I') :        [],
    ('lnl'  , 'II') :       ['I','III','V'], 
    ('lnl'  , 'III'):       ['IV','V'], 
    ('lnl'  , 'IV') :       [],
    ('lnl'  , 'V') :        [],
    ('lnl'  , 'VII') :      [],
}

model = lymph.MidlineBilateral(graph = graph,use_mixing= True, trans_symmetric =True)
model.modalities = {'CT': [0.76, 0.81],
                    'MRI': [0.63, 0.81],
                    'PET': [0.86, 0.79],
                    'FNA': [0.98, 0.80],
                    'diagnostic_consensus': [0.86, 0.81],
                    'pathology': [1.0, 1.0],
                    'pCT': [0.86, 0.81],
                    'max_llh': [1.0, 1.0]
                    }


# Time prior with p(early) = 0.3
def binom_pmf(k: np.ndarray, n: int, p: float):
    """Binomial PMF"""
    if p > 1. or p < 0.:
        raise ValueError("Binomial prob must be btw. 0 and 1")
    q = (1. - p)
    binom_coeff = factorial(n) / (factorial(k) * factorial(n - k))
    return binom_coeff * p**k * q**(n - k)

def parametric_binom_pmf(n: int):
    """Return a parametric binomial PMF"""
    def inner(t, p):
        """Parametric binomial PMF"""
        return binom_pmf(t, n, p)
    return inner

max_t = 10
model.diag_time_dists["early"] = sp.stats.binom.pmf(np.arange(max_t+1), max_t, 0.3)
model.diag_time_dists["late"] = parametric_binom_pmf(max_t)
model.patient_data = dataset_full

In [3]:
backend = emcee.backends.HDFBackend(filename = "lynference/models/samples.hdf5")
samples = backend.get_chain(flat = True)
model.check_and_assign(samples.mean(axis = 0))
model.modalities = {'max_llh_diagnose' : [1,0.81]}

In [128]:
def levels_to_spare(threshold, model, risks):
    """Computes which LNLs to irradiate given the threshold, model and the risk of each state.

    Args:
        threshold (float): Risk threshold we want to apply
        model (lymph.Unilateral): lymph.unilateral object with fully analyzed patients
        risks (ndarray): Array with the risk of each state

    Returns:
        _type_: _description_
    """
    state_list = model.noext.ipsi.state_list
    lnls = ['I','II', 'III', 'IV','V', 'VII']
    overall_risk_ipsi = {}
    overall_risk_contra = {}
    for index, lnl in enumerate(lnls):
        overall_risk_ipsi[lnl] = risks[np.where((state_list[:,index] == 1))[0]].sum()
        overall_risk_contra[lnl] = risks.T[np.where((state_list[:,index] == 1))[0]].sum()

    combined_dict = {f'ipsi {key}': value for key, value in overall_risk_ipsi.items()}
    combined_dict.update({f'contra {key}': value for key, value in overall_risk_contra.items()})
    ranked_combined = sorted(combined_dict.items(), key = lambda item: item[1])
    total_risk_new = 0
    looper = 1
    treated_array = np.ones(12)
    contra_lnl_indices = []
    ipsi_lnl_indices = []
    treated_ipsi = []
    treated_contra = []
    while total_risk_new < threshold:
        total_risk = total_risk_new
        if ipsi_lnl_indices != []:
            treated_array[ipsi_lnl_indices] = 0
        if contra_lnl_indices != []:
            treated_array[np.array(contra_lnl_indices)+6] = 0
        lnls_of_interest = ranked_combined[0:looper]
        lnls_of_interest_names = [t[0] for t in lnls_of_interest]
        contra_lnl_indices = []
        ipsi_lnl_indices = []
        for i,lnl_looper in enumerate(lnls_of_interest_names):
            contra_lnl_indices.append(np.where(np.array(lnls) == lnls_of_interest_names[i].split()[1])[0][0]) if lnl_looper.split()[0] == 'contra' else ipsi_lnl_indices.append(np.where(np.array(lnls) == lnls_of_interest_names[i].split()[1])[0][0])
        indices_list_contra = []
        indices_list_ipsi = []
        for index in contra_lnl_indices:
            condition_contra = (state_list[:, index] == 1)
            indices_contra = np.where(condition_contra)[0]
            indices_list_contra.extend(indices_contra)
            unique_contra = np.unique(indices_list_contra)
        for index in ipsi_lnl_indices:
            condition_ipsi = (state_list[:, index] == 1)
            indices_ipsi = np.where(condition_ipsi)[0]
            indices_list_ipsi.extend(indices_ipsi)
            unique_ipsi = np.unique(indices_list_ipsi)    
        if len(ipsi_lnl_indices) == 0:
            total_risk_new = risks.T[unique_contra].sum()
        elif len(contra_lnl_indices) == 0:
            total_risk_new = risks.T[unique_ipsi].sum()
        else:
            total_risk_new = 0
            total_risk_new += risks[unique_ipsi].sum()
            total_risk_new += risks.T[unique_contra][:,[np.setdiff1d(np.array(range(64)),unique_ipsi)]].sum()
        spared_lnls = lnls_of_interest[:-1]
        treated_lnls = ranked_combined[looper-1:]
        looper += 1
        # print(total_risk_new)
    for to_treat in treated_lnls:
        if to_treat[0].split()[0] == 'ipsi':
            treated_ipsi.append(to_treat[0].split()[1])
        else: 
            treated_contra.append(to_treat[0].split()[1])
    return spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra

In [132]:
diagnose = {'max_llh_diagnose':{
    "ipsi": {
        "I": 0,
        "II": 1,
        "III": 1,
        "IV": 0,
        "V": 0,
        "VII": 0,
    },
    "contra": {
        "I": 0,
        "II": 0,
        "III": 0,
        "IV": 0,
        "V": 0,
        "VII": 0,
    }
}}
risk = model.risk(given_params = samples.mean(axis = 0), t_stage = 'late', given_diagnoses = diagnose,midline_extension=True) 
spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra = levels_to_spare(0.10, model, risk)
print(treated_lnls)
print(total_risk)
spared_lnls

[('ipsi IV', 0.05261170572428035), ('contra II', 0.07324496418473062), ('ipsi II', 0.9999999999999999), ('ipsi III', 0.9999999999999999)]
0.09210693760145781


[('contra V', 0.0023016528638010393),
 ('contra I', 0.00306588162903435),
 ('contra IV', 0.003582285928790788),
 ('contra VII', 0.007380262777922642),
 ('contra III', 0.013145615542405105),
 ('ipsi VII', 0.01700921797206342),
 ('ipsi V', 0.023726098426372393),
 ('ipsi I', 0.0270270886338693)]

In [86]:
thin = 100
sampled_risks = np.zeros(shape=(len(samples[::thin]),64,64), dtype=float)
for i, sample in enumerate(np.random.permutation(samples[::thin])):
    sampled_risks[i] = model.risk(given_params = sample, t_stage = 'late', given_diagnoses = diagnose,midline_extension=True) 
spared_lnls2, total_risk2, ranked_combined2, treated_lnls2, treated_array2 = levels_to_spare(0.10, model, risk)
(total_risk-total_risk2)*100

0.0

In [17]:
import numpy as np
from scipy import stats
# Sample data (replace this with your list of values)
data = [0.0033651325742671545,0.0006296642905592731,0.0038544965474826554,0.027808747252694443,0.06140695725329742,0.057461732311052394,0.004550020642171693,0.024459000763246697]


# Calculate the mean and standard error of the mean (SEM)
mean = np.mean(data)
sem = stats.sem(data)

# Define the confidence level (e.g., 95%)
confidence_level = 0.95

# Calculate the margin of error
margin_of_error = sem * stats.t.ppf((1 + confidence_level) / 2, len(data) - 1)

# Calculate the confidence interval
confidence_interval = (mean - margin_of_error, mean + margin_of_error)

print(f"Mean: {mean:.2f}")
print(f"Standard Error of the Mean: {sem:.2f}")
print(f"95% Confidence Interval: ({confidence_interval[0]:.2f}, {confidence_interval[1]:.2f})")


Mean: 0.02
Standard Error of the Mean: 0.01
95% Confidence Interval: (0.00, 0.04)


## Combination analysis

In [144]:
from collections import Counter
from collections import defaultdict


# Sample array with different entry combinations
data = np.array(dataset_analyze)

entry_combinations_with_indexes = defaultdict(list)
for index, row in enumerate(data):
    combination = tuple(row)
    entry_combinations_with_indexes[combination].append(index)
USZ_counts = []
USZ_combinations = []
USZ_indexes = []
# Print the most common combinations, their USZ_counts, and indexes
for combination, indexes in entry_combinations_with_indexes.items():
    count = len(indexes)
    USZ_indexes.append(indexes)
    # print(f"Combination: {combination}, Count: {count}, Indexes: {indexes}")
    USZ_counts.append(count)
    USZ_combinations.append(combination)

lnls = ['I','II', 'III', 'IV','V', 'VII']
t_stage = []
midline_extension = []
invovlvement_ipsi_USZ = []
invovlvement_contra_USZ = []
for diagnose_type in USZ_combinations:
    involved_ipsi = []
    involved_contra = []
    t_stage.append(diagnose_type[0])
    midline_extension.append(diagnose_type[1])
    for lnl_looper, involved_level in enumerate(lnls):
        if diagnose_type[lnl_looper +2] == True:
            involved_ipsi.append(involved_level) 
        if diagnose_type[lnl_looper +8] == True:
            involved_contra.append(involved_level)
    invovlvement_ipsi_USZ.append(involved_ipsi)
    invovlvement_contra_USZ.append(involved_contra)

In [134]:
def analysis_treated_lnls(combinations):
    treatment_array = np.zeros((len(combinations),12))
    top3_spared = []
    diagnose_looper = {'max_llh_diagnose':{
        "ipsi": {
            "I": 0,
            "II": 0,
            "III": 0,
            "IV": 0,
            "V": 0,
            "VII": 0,
        },
        "contra": {
            "I": 0,
            "II": 0,
            "III": 0,
            "IV": 0,
            "V": 0,
            "VII": 0,
        }
    }}
    treated_lnls_all = []
    treated_lnls_no_risk = []
    total_risks = np.zeros(len(combinations))
    treated_ipsi_all = []
    treated_contra_all = []
    for index, pattern in enumerate(combinations):
        treated_looper = set()
        stage = pattern[0]
        midline_extension = pattern[1]
        counter_ipsi = 0
        for lnl_ipsi, status in diagnose_looper['max_llh_diagnose']['ipsi'].items():
            diagnose_looper['max_llh_diagnose']['ipsi'][lnl_ipsi] = pattern[2+counter_ipsi]
            counter_ipsi += 1
        counter_contra = 0
        for lnl_contra, status in diagnose_looper['max_llh_diagnose']['contra'].items():
            diagnose_looper['max_llh_diagnose']['contra'][lnl_contra] = pattern[8+counter_contra]
            counter_contra += 1
        risk = model.risk(given_params = samples.mean(axis = 0), t_stage = stage, given_diagnoses = diagnose_looper,midline_extension=midline_extension)     
        spared_lnls, total_risk, ranked_combined, treated_lnls, treated_array, treated_ipsi, treated_contra =levels_to_spare(0.10, model, risk)
        for i in treated_lnls:
            treated_looper.add(i[0])
        treated_lnls_all.append(treated_lnls)
        treated_lnls_no_risk.append(treated_looper)
        treatment_array[index] = treated_array
        total_risks[index] = total_risk
        top3_spared.append(spared_lnls[::-1][:3])
        treated_ipsi_all.append(treated_ipsi)
        treated_contra_all.append(treated_contra)
    return treated_lnls_no_risk, treated_lnls_all, treatment_array, top3_spared, total_risks, treated_ipsi_all, treated_contra_all

def count_numnber_treatments(treated_lnls_no_risk):
    set_counts = {}
    # Iterate through the list and update the counts in the dictionary
    for value in treated_lnls_no_risk:
        frozen_set = frozenset(value)  # Convert the set to a frozenset
        if frozen_set in set_counts:
            set_counts[frozen_set] += 1
        else:
            set_counts[frozen_set] = 1
    return set_counts


In [135]:
usz_treated_lnls_no_risk, usz_treated_lnls_all, usz_treatment_array, usz_top3_spared, usz_total_risks, usz_treated_ipsi, usz_treated_contra = analysis_treated_lnls(USZ_combinations)
usz_set_counts = count_numnber_treatments(usz_treated_lnls_no_risk)
len(usz_set_counts)

39

In [150]:
data_export_usz = pd.DataFrame({'Percentage of patients': np.array(USZ_counts)/287,
                                'T-stage': t_stage,
                                'Midline Extension': midline_extension,
                                'Involvement Ipsi' : invovlvement_ipsi_USZ,
                                'Involvement Contra': invovlvement_contra_USZ,
                                'Treated Ipsi':  usz_treated_ipsi,
                                'Treated Contra': usz_treated_contra,
                                'risk': usz_total_risks,
                                'top 3 spared lnls risk': usz_top3_spared

})
data_export_usz.to_csv('analyzed_usz_data.csv', sep = ';', index = False)

In [14]:
#eine Spielerei
number_of_repetitions = []
for key, value in usz_set_counts.items():
    number_of_repetitions.append(value)
usz_set_counts[frozenset({'ipsi II'})]
asdf = (np.array(USZ_combinations) == ['late',False,False,True,False,False,False,False,False,False,False,False,False,False,])
# Define the condition (e.g., all 'True' values)
condition = (asdf[:, 1:] == 'True').all(axis=1)

# Find indices where the condition is met
indices = np.where(condition)

print(indices)

(array([], dtype=int64),)


## Here we repeat the analysis for all possible combinations

In [15]:
from itertools import product

# Define the possible values for the first element and the 13 boolean values
first_element_values = ['early', 'late']
boolean_values = [True, False]

# Generate all possible combinations using nested loops
all_combinations = []

for first_element in first_element_values:
    for bool_combination in product(boolean_values, repeat=13):
        vector = (first_element,) + bool_combination
        all_combinations.append(vector)



In [56]:
full_treated_lnls_no_risk, full_treated_lnls_all, full_treatment_array = analysis_treated_lnls(all_combinations)

In [61]:
all_combinations[-2]

('late',
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 True)

In [30]:
full_treated_lnls_no_risk[-1]

{'ipsi II'}

In [17]:
full_set_count = count_numnber_treatments(full_treated_lnls_no_risk)

In [20]:
number_of_repetitions = []
full_combinations_array = np.zeros(len(full_set_count))
loop = 0
for key, value in full_set_count.items():
    number_of_repetitions.append(value)
    full_combinations_array[loop] = value
    loop+=1
full_combinations_array

array([2.837e+03, 1.400e+02, 1.250e+02, 1.480e+02, 5.900e+01, 5.200e+01,
       5.400e+01, 4.900e+01, 2.400e+01, 2.400e+01, 1.540e+02, 1.620e+02,
       2.130e+02, 2.270e+02, 4.700e+01, 5.100e+01, 3.500e+01, 3.200e+01,
       2.400e+01, 2.400e+01, 1.440e+02, 2.990e+02, 7.400e+01, 2.900e+01,
       3.300e+01, 4.800e+01, 2.900e+01, 2.500e+01, 3.050e+02, 2.200e+02,
       3.170e+02, 7.700e+01, 2.600e+01, 2.500e+01, 7.200e+01, 4.900e+01,
       1.400e+01, 1.400e+01, 1.450e+02, 1.730e+02, 1.650e+02, 5.600e+01,
       5.400e+01, 3.200e+01, 3.200e+01, 1.650e+02, 1.630e+02, 1.220e+02,
       2.500e+01, 2.500e+01, 1.140e+02, 4.500e+01, 4.200e+01, 1.200e+01,
       1.200e+01, 1.940e+02, 3.600e+01, 3.500e+01, 1.130e+02, 5.400e+01,
       4.800e+01, 2.600e+01, 2.500e+01, 8.500e+01, 1.280e+02, 7.900e+01,
       7.900e+01, 3.700e+01, 3.600e+01, 1.200e+01, 1.200e+01, 1.800e+01,
       1.800e+01, 1.500e+01, 1.500e+01, 4.800e+01, 4.000e+01, 4.000e+01,
       1.600e+01, 1.600e+01, 1.000e+01, 1.000e+01, 

In [33]:
full_treated_lnls_no_risk

[{'contra I',
  'contra II',
  'contra III',
  'contra IV',
  'contra V',
  'contra VII',
  'ipsi I',
  'ipsi II',
  'ipsi III',
  'ipsi IV',
  'ipsi V',
  'ipsi VII'},
 {'contra I',
  'contra II',
  'contra III',
  'contra IV',
  'contra V',
  'ipsi I',
  'ipsi II',
  'ipsi III',
  'ipsi IV',
  'ipsi V',
  'ipsi VII'},
 {'contra I',
  'contra II',
  'contra III',
  'contra IV',
  'contra VII',
  'ipsi I',
  'ipsi II',
  'ipsi III',
  'ipsi IV',
  'ipsi V',
  'ipsi VII'},
 {'contra I',
  'contra II',
  'contra III',
  'contra IV',
  'ipsi I',
  'ipsi II',
  'ipsi III',
  'ipsi IV',
  'ipsi V',
  'ipsi VII'},
 {'contra I',
  'contra II',
  'contra III',
  'contra IV',
  'contra V',
  'contra VII',
  'ipsi I',
  'ipsi II',
  'ipsi III',
  'ipsi IV',
  'ipsi V',
  'ipsi VII'},
 {'contra I',
  'contra II',
  'contra III',
  'contra IV',
  'contra V',
  'ipsi I',
  'ipsi II',
  'ipsi III',
  'ipsi IV',
  'ipsi V',
  'ipsi VII'},
 {'contra I',
  'contra II',
  'contra III',
  'contra VII',
 