In [27]:
import pandas as pd
import numpy as np
from make_df import create_df, make_inputs, create_nscar_df
from naive import naive_fit, get_true_labels
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import log_loss
import matplotlib



In [19]:
#To avoid a dataset with only one label we append the following extra columns.

extra0 = pd.DataFrame([[0.1,0.1,0.1,0,1,0]],columns = ['x1', 'x2', 'x3', 'cure_label', 'int', 'nscar_censoring_indicator'])

extra1 = pd.DataFrame([[0.1,0.1,0.1,1,1,1]],columns = ['x1', 'x2', 'x3', 'cure_label', 'int', 'nscar_censoring_indicator'])

In [20]:
extra = pd.concat([extra0, extra1]) 

In [40]:
#In these tests we compart our algo with the clustering approach

r_weights = np.random.uniform(-0.9,0.9,(10,4))
r_nscar_weights = np.random.uniform(-0.9,0.9,(10,4)) #covariate weights determining prob of being censored 
scores = []
covariates = ['x1', 'x2', 'x3']
dist = [[0, 1], [0, 1], [0, 1]]
cols = ['nscar_censoring_indicator', 'cure_label']

for test_model_weights in r_weights: 
    
    for nscar_weights in r_nscar_weights:
    
        foo = create_nscar_df(covariates, dist, 150, test_model_weights, nscar_weights)
    
        foo = pd.concat([foo, extra])
    
        censored_inputs = make_inputs(foo, 0, cols)

        noncensored_inputs = make_inputs(foo, 1, cols)
    
        fit = naive_fit(censored_inputs, noncensored_inputs, 'use_HardEM')    
    
        y_pred = fit['pred']
    
        y_true = get_true_labels(foo, ['nscar_censoring_indicator', 'cure_label'])
    
        y_scores = fit['prob']
    
        hard_acc = accuracy_score(y_true, y_pred)
    
        hard_auc = roc_auc_score(y_true, y_scores)
        
        hard_logloss = log_loss(y_true, y_scores)
    
        fit_naive = naive_fit(censored_inputs, noncensored_inputs, 'use_clustering')
    
        y_pred_naive = fit_naive['pred']
    
        y_true_naive = get_true_labels(foo, ['nscar_censoring_indicator', 'cure_label'])
    
        y_scores_naive = fit_naive['prob']
    
        naive_acc = accuracy_score(y_true_naive, y_pred_naive)
    
        naive_auc = roc_auc_score(y_true_naive, y_scores_naive)
        
        naive_logloss = log_loss(y_true_naive, y_scores_naive)
    

        scores.append([hard_auc, naive_auc, hard_acc, naive_acc, hard_logloss, naive_logloss])
    
new_df = pd.DataFrame(columns=['Hard_auc', 'Naive_auc', 'Hard_acc', 'Naive_acc', 'Hard_ll', 'Naive_ll'], data=scores)


In [41]:
len(new_df)

100

In [42]:
len(new_df[new_df['Hard_ll']-new_df['Naive_ll']<0].index)
#How many times our logloss score was lower (better).

71

In [43]:
len(new_df[new_df['Hard_auc']-new_df['Naive_auc']>0].index)
#How many times our AUC score was higher.

61

In [46]:
len(new_df[new_df['Hard_acc']-new_df['Naive_acc']>0].index) 
#How many times our accuracy was higher.

51

In [34]:
#In these tests we compart our algo with 'all censored assumed cured'

r_weights = np.random.uniform(-0.9,0.9,(10,4))
r_nscar_weights = np.random.uniform(-0.9,0.9,(10,4)) #covariate weights determining prob of being censored 
scores = []
covariates = ['x1', 'x2', 'x3']
dist = [[0, 1], [0, 1], [0, 1]]
cols = ['nscar_censoring_indicator', 'cure_label']

for test_model_weights in r_weights: 
    
    for nscar_weights in r_nscar_weights:
    
        foo = create_nscar_df(covariates, dist, 150, test_model_weights, nscar_weights)
    
        foo = pd.concat([foo, extra])
    
        censored_inputs = make_inputs(foo, 0, cols)

        noncensored_inputs = make_inputs(foo, 1, cols)
    
        fit = naive_fit(censored_inputs, noncensored_inputs, 'use_HardEM')    
    
        y_pred = fit['pred']
    
        y_true = get_true_labels(foo, ['nscar_censoring_indicator', 'cure_label'])
    
        y_scores = fit['prob']
    
        hard_acc = accuracy_score(y_true, y_pred)
    
        hard_auc = roc_auc_score(y_true, y_scores)
        
        hard_logloss = log_loss(y_true, y_scores)
    
        fit_naive = naive_fit(censored_inputs, noncensored_inputs, 'all_cens_cured')
    
        y_pred_naive = fit_naive['pred']
    
        y_true_naive = get_true_labels(foo, ['nscar_censoring_indicator', 'cure_label'])
    
        y_scores_naive = fit_naive['prob']
    
        naive_acc = accuracy_score(y_true_naive, y_pred_naive)
    
        naive_auc = roc_auc_score(y_true_naive, y_scores_naive)
        
        naive_logloss = log_loss(y_true_naive, y_scores_naive)
    

        scores.append([hard_auc, naive_auc, hard_acc, naive_acc, hard_logloss, naive_logloss])
    
new_df = pd.DataFrame(columns=['Hard_auc', 'Naive_auc', 'Hard_acc', 'Naive_acc', 'Hard_ll', 'Naive_ll'], data=scores)

In [39]:
len(new_df)

100

In [36]:
len(new_df[new_df['Hard_ll']-new_df['Naive_ll']<0].index)
#How many times our logloss score was lower (better).

83

In [37]:
len(new_df[new_df['Hard_auc']-new_df['Naive_auc']>0].index)
#How many times our AUC score was higher.

41

In [38]:
len(new_df[new_df['Hard_acc']-new_df['Naive_acc']>0].index) 
#How many times our accuracy was higher.

61

In [151]:
#In these tests we compart our algo with the 'fifty-fifty' approach.

r_weights = np.random.uniform(-0.9,0.9,(32,4))
r_nscar_weights = np.random.uniform(-0.9,0.9,(31,4)) #covariate weights determining prob of being censored 
scores = []
covariates = ['x1', 'x2', 'x3']
dist = [[0, 1], [0, 1], [0, 1]]
cols = ['nscar_censoring_indicator', 'cure_label']

for test_model_weights in r_weights: 
    
    for nscar_weights in r_nscar_weights:
    
        foo = create_nscar_df(covariates, dist, 150, test_model_weights, nscar_weights)
    
        foo = pd.concat([foo, extra])
    
        censored_inputs = make_inputs(foo, 0, cols)

        noncensored_inputs = make_inputs(foo, 1, cols)
    
        fit = naive_fit(censored_inputs, noncensored_inputs, 'use_HardEM')    
    
        y_pred = fit['pred']
    
        y_true = get_true_labels(foo, ['nscar_censoring_indicator', 'cure_label'])
    
        y_scores = fit['prob']
    
        hard_acc = accuracy_score(y_true, y_pred)
    
        hard_auc = roc_auc_score(y_true, y_scores)
        
        hard_logloss = log_loss(y_true, y_scores)
    
        fit_naive = naive_fit(censored_inputs, noncensored_inputs, 'fifty_fifty')
    
        y_pred_naive = fit_naive['pred']
    
        y_true_naive = get_true_labels(foo, ['nscar_censoring_indicator', 'cure_label'])
    
        y_scores_naive = fit_naive['prob']
    
        naive_acc = accuracy_score(y_true_naive, y_pred_naive)
    
        naive_auc = roc_auc_score(y_true_naive, y_scores_naive)
        
        naive_logloss = log_loss(y_true_naive, y_scores_naive)
    

        scores.append([hard_auc, naive_auc, hard_acc, naive_acc, hard_logloss, naive_logloss])
    
new_df = pd.DataFrame(columns=['Hard_auc', 'Naive_auc', 'Hard_acc', 'Naive_acc', 'Hard_ll', 'Naive_ll'], data=scores)


In [152]:
len(new_df)

992

In [153]:
len(new_df[new_df['Hard_ll']-new_df['Naive_ll']<0].index)
#How many times our logloss score was lower (better).

556

In [154]:
len(new_df[new_df['Hard_auc']-new_df['Naive_auc']>0].index)
#How many times our AUC score was higher.

534

In [155]:
len(new_df[new_df['Hard_acc']-new_df['Naive_acc']>0].index) 
#How many times our accuracy was higher.

605