In [1]:
import xgboost
import pandas            as pd
import numpy             as np
import matplotlib.pyplot as plt
import random

In [6]:
def chart_creation_xgboost(res,chart_title,chart_name,distribution,parameter):
    
    distributionCol = distribution + ","+str(parameter)
    min_valid = round(np.min(res['test'][distributionCol]),4)
    min_train = round(np.min(res['train'][distributionCol]),4)
    min_iter  = round(np.nanargmin(res['test'][distributionCol]),0)

    textstr = '\n'.join((
                    'Min Train = %.2f' % (min_train, ),
                    'Min Valid = %.2f' % (min_valid, ),
                    'Min Iter  = %.2f' % (min_iter, )))

    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5,edgecolor="black")
    
    fig,ax1       = plt.subplots()
    ax2           = ax1.twinx()

    ln1 = ax1.plot(res['train'][distributionCol],color='blue',label='Training')
    ln2 = ax2.plot(res['test'][distributionCol],color='orange',label='Validation')
    
    lns = ln1 + ln2
    
    labs = [l.get_label() for l in lns]
    ax1.legend(lns, labs, loc='lower left',fancybox='round', facecolor='wheat',fontsize=8)
    
    ax1.set_xlabel("Number of Iterations(Trees)")
    ax1.set_ylabel("Training Negative Likelihood(Loss)")
    ax2.set_ylabel("Validation Negative Likelihood(Loss)")
    ax2.text(0.7, 0.90, textstr, transform=ax1.transAxes, fontsize=8,
        verticalalignment='top', bbox=props)
    plt.title(chart_title)
    plt.show()
    fig.savefig(chart_name)
    
def generate_result(X,y_lower,y_higher,params,X_val=None,y_lower_val=None,y_higher_val=None):
    
    res    = {}
    dtrain = xgboost.DMatrix(X)
    dtrain.set_float_info("label_lower_bound",y_lower)
    dtrain.set_float_info("label_upper_bound",y_higher)
    
    dtest  = xgboost.DMatrix(X_val)
    dtest.set_float_info("label_lower_bound",y_lower_val)
    dtest.set_float_info("label_upper_bound",y_higher_val)
    
    bst    = xgboost.train(params,dtrain,num_boost_round=100,evals=[(dtrain,"train"),(dtest,"test")],evals_result=res)
    return res

In [7]:
def calculateSummaryStats(data):
    data     = list(filter(lambda x : x not in [float('inf'),float('inf')],data))
    std      = np.std(data)
    avg      = np.mean(data)
    return std,avg

In [36]:
data      = pd.read_csv('simulate_survival.csv')
rows,cols = data.shape[0],data.shape[1]
list1     = list(np.arange(rows))
sample    = random.sample(list1, 800)

In [37]:
train = data[data.index.isin(sample)]
test  = data[~data.index.isin(sample)]

In [38]:
X            = train[['x1','x2','x3']]
X_val        = test[['x1','x2','x3']]
y_lower      = train['left']
y_lower_val  = test['left']
y_higher     = train['right']
y_higher_val = test['right']

In [39]:
dataName    = 'simulate'
data_name   = dataName
title       = 'Loss=LogLoss,Data='+data_name+' Test Fold='+str(test_fold)
file_name   = title+'.png'
    
params      = {'learning_rate':0.1, 'aft_noise_distribution' : 'normal', 'aft_sigma': 1.0,'eval_metric':'aft-nloglik@normal,1.0','objective':"aft:survival"}
bst         = generate_result(X,y_lower,y_higher,params,X_val,y_lower_val,y_higher_val)
chart_creation_xgboost(bst,title,file_name)

[0]	train-normal,1.0:nan	test-normal,1.0:nan
[1]	train-normal,1.0:nan	test-normal,1.0:nan
[2]	train-normal,1.0:nan	test-normal,1.0:nan
[3]	train-normal,1.0:nan	test-normal,1.0:nan
[4]	train-normal,1.0:nan	test-normal,1.0:nan
[5]	train-normal,1.0:nan	test-normal,1.0:nan
[6]	train-normal,1.0:nan	test-normal,1.0:nan
[7]	train-normal,1.0:nan	test-normal,1.0:nan
[8]	train-normal,1.0:nan	test-normal,1.0:nan
[9]	train-normal,1.0:nan	test-normal,1.0:nan
[10]	train-normal,1.0:nan	test-normal,1.0:nan
[11]	train-normal,1.0:nan	test-normal,1.0:nan
[12]	train-normal,1.0:nan	test-normal,1.0:nan
[13]	train-normal,1.0:nan	test-normal,1.0:nan
[14]	train-normal,1.0:nan	test-normal,1.0:nan
[15]	train-normal,1.0:nan	test-normal,1.0:nan
[16]	train-normal,1.0:nan	test-normal,1.0:nan
[17]	train-normal,1.0:nan	test-normal,1.0:nan
[18]	train-normal,1.0:nan	test-normal,1.0:nan
[19]	train-normal,1.0:nan	test-normal,1.0:nan
[20]	train-normal,1.0:nan	test-normal,1.0:nan
[21]	train-normal,1.0:nan	test-normal,1.0:na

TypeError: chart_creation_xgboost() missing 1 required positional argument: 'distribution'