In [1]:
from deap import base
from deap import creator
from deap import tools
from deap import algorithms

import time

import array
import pandas as pd

import random
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

from lifelines import KaplanMeierFitter

from lifelines import CoxPHFitter

from Data.dataset import Flchain, SimPHData

from scipy.stats import ttest_ind, ttest_rel

import seaborn as sns
from IPython.display import display

from model import ExpOneBitChange



In [12]:
def run_exps_changeing_censoring(correctly_labeled_p, n_samples=10, n_iterations=3):
    exps_df = pd.DataFrame()
    e_df = pd.DataFrame()

    for pi in range(1, 10):
        pc = pi * 0.1
        print(f'censoring_p_orig:{pc:.2f}')
        for k in range(n_iterations):
            
            exp = ExpOneBitChange(n_samples=n_samples, censoring_p_orig=pc, random_state_seed=k, correctly_labeled_p=correctly_labeled_p)
            print(f'             Iteration: {k}, Total events: {exp.e.sum()}, Correct events: {exp.new_unknown_true_e.sum()}', end='\r', flush=True)
            exp.run(k)
            #if k == 0:
            e_df[f'e_{pi}_orig_{k}'] = exp.e
            e_df[f'e_{pi}_{k}'] = exp.new_unknown_true_e
            exps_df = pd.concat([exps_df, exp.results_df], ignore_index=True)
        print('')
    return exps_df, e_df

# Correctly Labeled 0.25

In [13]:
exps_df25, e_df25 = run_exps_changeing_censoring(correctly_labeled_p=0.25, n_samples=100, n_iterations=1000)

censoring_p_orig:0.10
Bits% 1.0000 Iteration: 999, Total events: 90.0, Correct events: 23.0
censoring_p_orig:0.20
Bits% 1.0000 Iteration: 999, Total events: 80.0, Correct events: 20.0
censoring_p_orig:0.30
Bits% 1.0000 Iteration: 999, Total events: 70.0, Correct events: 18.0
censoring_p_orig:0.40
Bits% 1.0000 Iteration: 999, Total events: 60.0, Correct events: 15.0
censoring_p_orig:0.50
Bits% 1.0000 Iteration: 999, Total events: 50.0, Correct events: 13.0
censoring_p_orig:0.60
Bits% 1.0000 Iteration: 999, Total events: 40.0, Correct events: 10.0
censoring_p_orig:0.70
Bits% 1.0000 Iteration: 999, Total events: 30.0, Correct events: 8.0
censoring_p_orig:0.80
Bits% 1.0000 Iteration: 999, Total events: 20.0, Correct events: 5.0
censoring_p_orig:0.90
Bits% 1.0000 Iteration: 999, Total events: 10.0, Correct events: 3.0


In [14]:
exps_df25.to_csv('flip_one_bit_sim_exps_df_25_correct.csv', index=False)
e_df25.to_csv('flip_one_bit_sim_e_df_25_correct.csv', index=False)

In [15]:
g25 = exps_df25.groupby(['exp_id', 'censoring_p_orig'])
max_df25 = g25.apply(lambda x: x[x['diff_from_base_ci']==x['diff_from_base_ci'].max()])

print('P that max change in CI is caused by wrongly labeled instance:', max_df25['is_wrong_event_label'].mean(), f', the random guess is {1-0.25}')

P that max change in CI is caused by wrongly labeled instance: 0.8791111111111111 , the random guess is 0.75


# Correctly Labeled 0.50

In [16]:
exps_df50, e_df50 = run_exps_changeing_censoring(correctly_labeled_p=0.50, n_samples=100, n_iterations=1000)

censoring_p_orig:0.10
Bits% 1.0000 Iteration: 999, Total events: 90.0, Correct events: 45.0
censoring_p_orig:0.20
Bits% 1.0000 Iteration: 999, Total events: 80.0, Correct events: 40.0
censoring_p_orig:0.30
Bits% 1.0000 Iteration: 999, Total events: 70.0, Correct events: 35.0
censoring_p_orig:0.40
Bits% 1.0000 Iteration: 999, Total events: 60.0, Correct events: 30.0
censoring_p_orig:0.50
Bits% 1.0000 Iteration: 999, Total events: 50.0, Correct events: 25.0
censoring_p_orig:0.60
Bits% 1.0000 Iteration: 999, Total events: 40.0, Correct events: 20.0
censoring_p_orig:0.70
Bits% 1.0000 Iteration: 999, Total events: 30.0, Correct events: 15.0
censoring_p_orig:0.80
Bits% 1.0000 Iteration: 999, Total events: 20.0, Correct events: 10.0
censoring_p_orig:0.90
Bits% 1.0000 Iteration: 999, Total events: 10.0, Correct events: 5.0


In [17]:
exps_df50.to_csv('flip_one_bit_sim_exps_df_50_correct.csv', index=False)
e_df50.to_csv('flip_one_bit_sim_e_df_50_correct.csv', index=False)

In [18]:
g50 = exps_df50.groupby(['exp_id', 'censoring_p_orig'])
max_df50 = g50.apply(lambda x: x[x['diff_from_base_ci']==x['diff_from_base_ci'].max()])

print('P that max change in CI is caused by wrongly labeled instance:', max_df50['is_wrong_event_label'].mean(), f', the random guess is {1-0.50}')

P that max change in CI is caused by wrongly labeled instance: 0.7196666666666667 , the random guess is 0.5


# Correctly Labeled 0.75

In [19]:
exps_df75, e_df75 = run_exps_changeing_censoring(correctly_labeled_p=0.75, n_samples=100, n_iterations=1000)

censoring_p_orig:0.10
Bits% 1.0000 Iteration: 999, Total events: 90.0, Correct events: 68.0
censoring_p_orig:0.20
Bits% 1.0000 Iteration: 999, Total events: 80.0, Correct events: 60.0
censoring_p_orig:0.30
Bits% 1.0000 Iteration: 999, Total events: 70.0, Correct events: 53.0
censoring_p_orig:0.40
Bits% 1.0000 Iteration: 999, Total events: 60.0, Correct events: 45.0
censoring_p_orig:0.50
Bits% 1.0000 Iteration: 999, Total events: 50.0, Correct events: 38.0
censoring_p_orig:0.60
Bits% 1.0000 Iteration: 999, Total events: 40.0, Correct events: 30.0
censoring_p_orig:0.70
Bits% 1.0000 Iteration: 999, Total events: 30.0, Correct events: 23.0
censoring_p_orig:0.80
Bits% 1.0000 Iteration: 999, Total events: 20.0, Correct events: 15.0
censoring_p_orig:0.90
Bits% 1.0000 Iteration: 999, Total events: 10.0, Correct events: 8.0


In [20]:
exps_df75.to_csv('flip_one_bit_sim_exps_df_75_correct.csv', index=False)
e_df75.to_csv('flip_one_bit_sim_e_df_75_correct.csv', index=False)

In [21]:
g75 = exps_df75.groupby(['exp_id', 'censoring_p_orig'])
max_df75 = g75.apply(lambda x: x[x['diff_from_base_ci']==x['diff_from_base_ci'].max()])

print('P that max change in CI is caused by wrongly labeled instance:', max_df75['is_wrong_event_label'].mean(), f', the random guess is {1-0.75}')

P that max change in CI is caused by wrongly labeled instance: 0.44433333333333336 , the random guess is 0.25
