# Load the dataset and prepares default attributes (id, timestamp, state)

In [1]:
# Load the dataset

import pandas as pd

incidents = pd.read_csv("VINST cases incidents.csv", sep=";", encoding="iso-8859-1")
incidents.fillna('?', inplace = True)
incidents.head(3)

Unnamed: 0,SR Number,Change Date+Time,Status,Sub Status,Involved ST Function Div,Involved Org line 3,Involved ST,SR Latest Impact,Product,Country,Owner Country,Owner First Name
0,1-364285768,2010-03-31T15:59:42+01:00,Accepted,In Progress,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic
1,1-364285768,2010-03-31T16:00:56+01:00,Accepted,In Progress,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic
2,1-364285768,2010-03-31T16:45:48+01:00,Queued,Awaiting Assignment,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Frederic


In [2]:
# SR Number is the case id

incidents["SR Number"].describe()

count           65533
unique           7554
top       1-687082195
freq              123
Name: SR Number, dtype: object

In [3]:
# Transform date time into timestamp

import datetime
import time

incidents['timestamp'] = incidents['Change Date+Time'].transform(lambda x: time.mktime(datetime.datetime.strptime(x[0:19], "%Y-%m-%dT%H:%M:%S").timetuple()))

In [4]:
incidents.head(5)

Unnamed: 0,SR Number,Change Date+Time,Status,Sub Status,Involved ST Function Div,Involved Org line 3,Involved ST,SR Latest Impact,Product,Country,Owner Country,Owner First Name,timestamp
0,1-364285768,2010-03-31T15:59:42+01:00,Accepted,In Progress,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,1270044000.0
1,1-364285768,2010-03-31T16:00:56+01:00,Accepted,In Progress,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,1270044000.0
2,1-364285768,2010-03-31T16:45:48+01:00,Queued,Awaiting Assignment,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Frederic,1270047000.0
3,1-364285768,2010-04-06T15:44:07+01:00,Accepted,In Progress,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Anne Claire,1270561000.0
4,1-364285768,2010-04-06T15:44:38+01:00,Queued,Awaiting Assignment,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Anne Claire,1270561000.0


In [5]:
# Renames case id and creates state using status and sub status (id and state are named by convention)
incidents.rename(index=str, columns={'SR Number': "id"}, inplace=True)
incidents['state'] = incidents['Status']+"-"+incidents['Sub Status']
incidents.drop(['Status', 'Sub Status'], axis = 1, inplace=True)
incidents.reset_index(drop = True, inplace=True)
incidents.head(5)

Unnamed: 0,id,Change Date+Time,Involved ST Function Div,Involved Org line 3,Involved ST,SR Latest Impact,Product,Country,Owner Country,Owner First Name,timestamp,state
0,1-364285768,2010-03-31T15:59:42+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,1270044000.0,Accepted-In Progress
1,1-364285768,2010-03-31T16:00:56+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,1270044000.0,Accepted-In Progress
2,1-364285768,2010-03-31T16:45:48+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Frederic,1270047000.0,Queued-Awaiting Assignment
3,1-364285768,2010-04-06T15:44:07+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Anne Claire,1270561000.0,Accepted-In Progress
4,1-364285768,2010-04-06T15:44:38+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Anne Claire,1270561000.0,Queued-Awaiting Assignment


# Creates ancillary attributes

In [6]:
def add_next_state(df):
    df['next_state'] = ''
    df['next_time'] = 0
    df['next_dur'] = 0
    df['prev_time'] = 0
    df['elapsed_time_from_event'] = 0
    df['event_order'] = 0
    
    num_rows = len(df)
    event_order = 0

    for i in range(0, num_rows - 1):
        #print(str(i) + ' out of ' + str(num_rows))

        if df.at[i, 'id'] == df.at[i + 1, 'id']:
            df.at[i, 'next_state'] = df.at[i + 1, 'state']
            df.at[i, 'next_time'] = df.at[i + 1, 'timestamp']
            df.at[i, 'next_dur'] = df.at[i + 1, 'timestamp'] - df.at[i, 'timestamp']
            df.at[i, 'event_order'] = event_order
            df.at[i+1, 'prev_time'] = df.at[i, 'timestamp']
            df.at[i+1, 'elapsed_time_from_event'] = df.at[i+1, 'timestamp'] - df.at[i, 'timestamp']
            event_order = event_order + 1
        else:
            df.at[i, 'next_state'] = 99
            df.at[i, 'next_time'] = df.at[i, 'timestamp']
            df.at[i, 'next_dur'] = 0
            df.at[i, 'event_order'] = event_order            
            df.at[i+1, 'prev_time'] = 0
            df.at[i+1, 'elapsed_time_from_event'] = 0
            event_order = 0


    df.at[0, 'prev_time'] = df.at[0, 'timestamp']
    df.at[0, 'elapsed_time_from_event'] = 0
    df.at[num_rows-1, 'next_state'] = 99
    df.at[num_rows-1, 'next_time'] = df.at[num_rows-1, 'timestamp']
    df.at[num_rows-1, 'next_dur'] = 0
    df.at[num_rows-1, 'event_order'] = event_order

    return df


In [7]:
#  Adds several attributes
add_next_state(incidents)
incidents.head(30)

Unnamed: 0,id,Change Date+Time,Involved ST Function Div,Involved Org line 3,Involved ST,SR Latest Impact,Product,Country,Owner Country,Owner First Name,timestamp,state,next_state,next_time,next_dur,prev_time,elapsed_time_from_event,event_order
0,1-364285768,2010-03-31T15:59:42+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,1270044000.0,Accepted-In Progress,Accepted-In Progress,1270044056,74,1270043982,0,0
1,1-364285768,2010-03-31T16:00:56+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,1270044000.0,Accepted-In Progress,Queued-Awaiting Assignment,1270046748,2692,1270043982,74,1
2,1-364285768,2010-03-31T16:45:48+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Frederic,1270047000.0,Queued-Awaiting Assignment,Accepted-In Progress,1270561447,514699,1270044056,2692,2
3,1-364285768,2010-04-06T15:44:07+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Anne Claire,1270561000.0,Accepted-In Progress,Queued-Awaiting Assignment,1270561478,31,1270046748,514699,3
4,1-364285768,2010-04-06T15:44:38+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Anne Claire,1270561000.0,Queued-Awaiting Assignment,Accepted-In Progress,1270561487,9,1270561447,31,4
5,1-364285768,2010-04-06T15:44:47+01:00,A2_5,Org line A2,V13 2nd 3rd,Medium,PROD582,fr,France,Anne Claire,1270561000.0,Accepted-In Progress,Completed-Resolved,1270561491,4,1270561478,9,5
6,1-364285768,2010-04-06T15:44:51+01:00,A2_5,Org line A2,V13 2nd 3rd,Medium,PROD582,fr,France,Anne Claire,1270561000.0,Completed-Resolved,Queued-Awaiting Assignment,1270561507,16,1270561487,4,6
7,1-364285768,2010-04-06T15:45:07+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Anne Claire,1270562000.0,Queued-Awaiting Assignment,Accepted-In Progress,1270720343,158836,1270561491,16,7
8,1-364285768,2010-04-08T11:52:23+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Eric,1270720000.0,Accepted-In Progress,Queued-Awaiting Assignment,1270720415,72,1270561507,158836,8
9,1-364285768,2010-04-08T11:53:35+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Eric,1270720000.0,Queued-Awaiting Assignment,Accepted-In Progress,1271750831,1030416,1270720343,72,9


In [8]:
# Checks durations are consistent
(incidents['next_dur'] >= 0).all()

True

In [9]:
import numpy as np
def add_start_end(df):
    id_group = df.groupby('id')
    g = id_group['timestamp'].agg([np.min, np.max])
    ev = id_group['event_order'].agg(np.max)
    for i in range(0,len(df)):
        id_case = df.at[i, 'id']
        df.at[i, 'start_case'] = g.at[id_case, 'amin']
        df.at[i, 'end_case'] = g.at[id_case, 'amax']
        df.at[i, 'total_events'] = ev.at[id_case] + 1
        
        
    return df


In [10]:
# Adds more attributes related to cases

incidents = add_start_end(incidents)
incidents.head(10)

Unnamed: 0,id,Change Date+Time,Involved ST Function Div,Involved Org line 3,Involved ST,SR Latest Impact,Product,Country,Owner Country,Owner First Name,...,state,next_state,next_time,next_dur,prev_time,elapsed_time_from_event,event_order,start_case,end_case,total_events
0,1-364285768,2010-03-31T15:59:42+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,...,Accepted-In Progress,Accepted-In Progress,1270044056,74,1270043982,0,0,1270044000.0,1336689000.0,17.0
1,1-364285768,2010-03-31T16:00:56+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Frederic,...,Accepted-In Progress,Queued-Awaiting Assignment,1270046748,2692,1270043982,74,1,1270044000.0,1336689000.0,17.0
2,1-364285768,2010-03-31T16:45:48+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Frederic,...,Queued-Awaiting Assignment,Accepted-In Progress,1270561447,514699,1270044056,2692,2,1270044000.0,1336689000.0,17.0
3,1-364285768,2010-04-06T15:44:07+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Anne Claire,...,Accepted-In Progress,Queued-Awaiting Assignment,1270561478,31,1270046748,514699,3,1270044000.0,1336689000.0,17.0
4,1-364285768,2010-04-06T15:44:38+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Anne Claire,...,Queued-Awaiting Assignment,Accepted-In Progress,1270561487,9,1270561447,31,4,1270044000.0,1336689000.0,17.0
5,1-364285768,2010-04-06T15:44:47+01:00,A2_5,Org line A2,V13 2nd 3rd,Medium,PROD582,fr,France,Anne Claire,...,Accepted-In Progress,Completed-Resolved,1270561491,4,1270561478,9,5,1270044000.0,1336689000.0,17.0
6,1-364285768,2010-04-06T15:44:51+01:00,A2_5,Org line A2,V13 2nd 3rd,Medium,PROD582,fr,France,Anne Claire,...,Completed-Resolved,Queued-Awaiting Assignment,1270561507,16,1270561487,4,6,1270044000.0,1336689000.0,17.0
7,1-364285768,2010-04-06T15:45:07+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Anne Claire,...,Queued-Awaiting Assignment,Accepted-In Progress,1270720343,158836,1270561491,16,7,1270044000.0,1336689000.0,17.0
8,1-364285768,2010-04-08T11:52:23+01:00,A2_4,Org line A2,V30,Medium,PROD582,fr,France,Eric,...,Accepted-In Progress,Queued-Awaiting Assignment,1270720415,72,1270561507,158836,8,1270044000.0,1336689000.0,17.0
9,1-364285768,2010-04-08T11:53:35+01:00,A2_5,Org line A2,V5 3rd,Medium,PROD582,fr,France,Eric,...,Queued-Awaiting Assignment,Accepted-In Progress,1271750831,1030416,1270720343,72,9,1270044000.0,1336689000.0,17.0


In [11]:
# Finally, computes time-related attributes
incidents['remaining_time'] = incidents['end_case'] - incidents['timestamp']
incidents['time_from_start'] = incidents['timestamp'] - incidents['start_case']
incidents['total_time'] = incidents['end_case'] - incidents['start_case']

# Starts encoding

In [12]:
def index_encoding(df, col, default, window = None):
    if window is None:
        hist_len = np.max(df['event_order'])
    else:
        hist_len = window

    for k in range(0, hist_len):
        df[col + '_' + str(k)] = default

    history = df.groupby('id').apply(lambda x: x[col].values)
    
    num_rows = len(df)
    for i in range(0, num_rows):        
        id_number = df.at[i, 'id']
        prefix = history[id_number]
        event_order = df.at[i, 'event_order']
        start_point = max(0, event_order + 1 - hist_len)
        for k in range(start_point, event_order + 1):
            df.at[i, col + '_' + str(k - start_point)] = prefix[k]
    
    return df, hist_len


def frequency_encoding(df, col):
    possible_values = df[col].unique()

    for k in possible_values:
        df[col + '_' + str(k)] = 0

    history = df.groupby('id').apply(lambda x: x[col].values)    
    num_rows = len(df)
    
    for i in range(0, num_rows):
        id_number = df.at[i, 'id']
        trace = history[id_number]
        event_order = df.at[i, 'event_order']
        prefix = trace[:event_order+1]
        unique, counts = np.unique(prefix, return_counts=True)
        for j in range(0, len(unique)):
            df.at[i, col+'_'+str(unique[j])] = counts[j]

    return df

In [13]:
# These are the columns of the dataset

incidents.columns

Index(['id', 'Change Date+Time', 'Involved ST Function Div',
       'Involved Org line 3', 'Involved ST', 'SR Latest Impact', 'Product',
       'Country', 'Owner Country', 'Owner First Name', 'timestamp', 'state',
       'next_state', 'next_time', 'next_dur', 'prev_time',
       'elapsed_time_from_event', 'event_order', 'start_case', 'end_case',
       'total_events', 'remaining_time', 'time_from_start', 'total_time'],
      dtype='object')

In [14]:
# We chose those that we encode using a window of 2

index_columns = ['Involved ST Function Div','Involved Org line 3', 'Involved ST', 'SR Latest Impact', 'Product',
       'Country', 'Owner Country', 'Owner First Name', 'state']

for col in index_columns:
    incidents, hist_len = index_encoding(incidents, col, default = '?', window=2)
    

In [15]:
# And the same for integers
index_int_columns = ['elapsed_time_from_event', 'time_from_start', 'event_order']
for col in index_int_columns:
    incidents, hist_len = index_encoding(incidents, col, default = 0, window=2)

In [16]:
# These are the columns that we have now

incidents.columns

Index(['id', 'Change Date+Time', 'Involved ST Function Div',
       'Involved Org line 3', 'Involved ST', 'SR Latest Impact', 'Product',
       'Country', 'Owner Country', 'Owner First Name', 'timestamp', 'state',
       'next_state', 'next_time', 'next_dur', 'prev_time',
       'elapsed_time_from_event', 'event_order', 'start_case', 'end_case',
       'total_events', 'remaining_time', 'time_from_start', 'total_time',
       'Involved ST Function Div_0', 'Involved ST Function Div_1',
       'Involved Org line 3_0', 'Involved Org line 3_1', 'Involved ST_0',
       'Involved ST_1', 'SR Latest Impact_0', 'SR Latest Impact_1',
       'Product_0', 'Product_1', 'Country_0', 'Country_1', 'Owner Country_0',
       'Owner Country_1', 'Owner First Name_0', 'Owner First Name_1',
       'state_0', 'state_1', 'elapsed_time_from_event_0',
       'elapsed_time_from_event_1', 'time_from_start_0', 'time_from_start_1',
       'event_order_0', 'event_order_1'],
      dtype='object')

# Prepares the dataset that will be used for learning

In [17]:
# Remove the attributes that are not going to be used for learning

incidents_X = incidents.drop(['Change Date+Time', 'Involved ST Function Div',
       'Involved Org line 3', 'Involved ST', 'SR Latest Impact', 'Product',
       'Country', 'Owner Country', 'Owner First Name', 'timestamp', 'state',
       'next_state', 'next_time', 'next_dur', 'prev_time',
       'elapsed_time_from_event', 'event_order', 'start_case', 'end_case',
       'remaining_time', 'time_from_start'], axis=1)

In [18]:
# Choose the target attribute (kind of)

incidents_Y = incidents['remaining_time']

In [21]:
# Choose a threshold for the classification task
Y_threshold = 1036800
(incidents_Y < Y_threshold).describe()

count     65533
unique        2
top        True
freq      46320
Name: remaining_time, dtype: object

In [22]:
# And the attribute (id) that is used to split the dataset in train, validation and test
incidents_group = incidents['id']

# Computes and evaluate the reliability

In [23]:
# Creates a dataset with the values of the prediction, adv, and time that are necessary to compute the reliability
def build_output(val_onehot, clf, time_regressor, adv_regressor, actual, adv, time):
    val_predict = clf.predict_proba(val_onehot)    
    time_pred_val = time_regressor.predict(val_onehot)
    adv_pred_val = adv_regressor.predict(val_onehot)  
    
    output_val = pd.DataFrame(data = {'est_time': time_pred_val, 'est_adv':adv_pred_val, 'proba1': val_predict.transpose()[1], 'prediction': clf.predict(val_onehot), 'actual': actual, 'adv':adv, 'time':time})
    output_val['proba_full'] = output_val['proba1']
    output_val.loc[output_val['prediction'] == 0, 'proba_full'] = (1 - output_val['proba1'])
    output_val['adv_perc'] = np.minimum(1, output_val['adv']/round(output_val['est_adv'],0))
    output_val['time_perc'] = np.minimum(1, output_val['time']/output_val['est_time'])
    
    return output_val

In [24]:
from scipy.stats.stats import pearsonr

# Computes error for reliability. The first parameter is the result of build_output and the second
# parameter is the step of each interval for the evaluation
def errormetric(output_val, step=0.1):
    results = []
    count = []
    mid_point = []
    lower = np.round(np.arange(0.0, 1.0, step), 2)

    for i in range(len(lower)):
        if i < len(lower)-1:
            output_interval = output_val[(output_val['reliability'] >= lower[i]) & (output_val['reliability'] < lower[i+1])]
        else:
            output_interval = output_val[(output_val['reliability'] >= lower[i])]

        mid_point.append((lower[i]+step/2))
        correct = np.sum(output_interval['prediction'] == output_interval['actual'])
        incorrect = len(output_interval) - correct
        if correct + incorrect > 0:
            results.append(correct/(correct + incorrect))
        else:
            results.append(float('NaN'))
        count.append(len(output_interval))
        
    results = np.asarray(results)
    mid_point = np.asarray(mid_point)
    
    errors = abs(results - mid_point)
    nonweighted_error = np.nanmean(errors)
    total = sum(count)
    weighted_error = np.nansum((np.asarray(count)/total)*errors)

    corr,pvalue = pearsonr(results[~np.isnan(results)], mid_point[~np.isnan(results)])

    return {'results': results, 'count': count, 'mid_point': mid_point, 'nwe': nonweighted_error, 'we': weighted_error, 'corr': corr, 'pvalue': pvalue}


In [25]:
from scipy.stats import iqr

# Searches all possible weights to check which one is better
def search_weights(output_val, weight_step=10, error_step=0.1, transform_adv=(lambda x: x), transform_time=(lambda x: x)):
    values = range(0, 110, weight_step)
    results = []
    
    for i in values:
        for j in values:
            for k in values:
                if i + j + k == 100:
                    output_val['reliability'] = i/100 * transform_adv(output_val['adv_perc']) + j/100 * transform_time(output_val['time_perc']) + k/100 * output_val['proba_full']
                    error = errormetric(output_val, 0.1)
                    results.append(np.concatenate(([i/100, j/100, k/100, error['nwe'], error['we'], error['corr'], error['pvalue'], np.mean(output_val['reliability']), np.std(output_val['reliability'])], error['count'], error['results'])))
                
    return results

# Shows the best results of the output of search_weights
def show_best_results(results):
    results = np.asarray(results)
    return results[np.argmin(results[:, 3])], results[np.argmin(results[:, 4])], results[np.argmin(results[:, 4])]

# Shows only the statistically significant results of the output of search_weights
def show_statistically_significant(results):
    results = np.asarray(results)
    return results[(results[:,6] <= 0.05) & (results[:,5] > 0)]

# Formats the results of search_weigths in an easier to understand manner
def display_results(results):
    results_ds = pd.DataFrame(results, columns=['w_adv', 'w_time', 'w_prob', 'non-weighted error', 'weighted error', 'correlation', 'pvalue', 'mean', 'std']+['c'+str(i) for i in range(0, 100, 10)]+['r'+str(i) for i in range(0, 100, 10)])
    return results_ds

# Stores the results in a csv
def store_results(results, path):
    results_ds = display_results(results)
    results_ds.to_csv(path)
    return results_ds

In [28]:
from sklearn.model_selection import GroupKFold
from sklearn.feature_extraction import DictVectorizer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.utils import safe_indexing

total_events_time = ['total_events', 'total_time', 'id']

# Trains DictVectorizer with the whole dataset to use one-hot encoding
all_dict = incidents_X.drop(total_events_time, axis=1).to_dict('record')
vec = DictVectorizer()
vec.fit(all_dict)

group_kfold = GroupKFold(n_splits=10)
i = 0

allresults = []

for train_index, test_index in group_kfold.split(incidents_X, incidents_Y, incidents_group):
    X_train, X_test = safe_indexing(incidents_X, train_index), safe_indexing(incidents_X, test_index)
    Y_train, Y_test = safe_indexing(incidents_Y, train_index), safe_indexing(incidents_Y, test_index)

    # Converts them into dictionaries in order to use one-hot-encoding
    train_dict = X_train.drop(total_events_time, axis=1).to_dict('record')
    test_dict = X_test.drop(total_events_time, axis=1).to_dict('record')
    
    train_onehot = vec.transform(train_dict)
    test_onehot = vec.transform(test_dict)
    print('Iteration ' + str(i)+' starting building models')
    clf = RandomForestClassifier(random_state=0)
    clf.fit(train_onehot, Y_train < Y_threshold)
    print('Iteration ' + str(i)+' classifier ready')

    time_regressor = RandomForestRegressor(random_state=0,n_jobs=-1)
    time_regressor.fit(train_onehot, X_train['total_time'])
    print('Iteration ' + str(i)+' time regressor ready')

    adv_regressor = RandomForestRegressor(random_state=0,n_jobs=-1)
    adv_regressor.fit(train_onehot, X_train['total_events'])
    print('Iteration ' + str(i)+' adv regressor ready')

    output_test = build_output(test_onehot, clf, time_regressor, adv_regressor, Y_test < Y_threshold, X_test['event_order_1']+1, X_test['time_from_start_1'])
    print('Iteration ' + str(i)+' finished building models')

    results = search_weights(output_test)
    df = store_results(results, 'bpi2013-normal-'+str(i)+'.csv')
    allresults.append(df)
    
    i = i + 1

current = pd.concat(allresults)
current = current.groupby(current.index).mean()        

current.to_csv("bpi2013-normal.csv")

display_results(np.stack(show_best_results(np.asarray(current))))


Iteration 0 starting building models
Iteration 0 classifier ready
Iteration 0 time regressor ready
Iteration 0 adv regressor ready
Iteration 0 finished building models
Iteration 1 starting building models
Iteration 1 classifier ready
Iteration 1 time regressor ready
Iteration 1 adv regressor ready
Iteration 1 finished building models
Iteration 2 starting building models
Iteration 2 classifier ready
Iteration 2 time regressor ready
Iteration 2 adv regressor ready
Iteration 2 finished building models
Iteration 3 starting building models
Iteration 3 classifier ready
Iteration 3 time regressor ready
Iteration 3 adv regressor ready
Iteration 3 finished building models
Iteration 4 starting building models
Iteration 4 classifier ready
Iteration 4 time regressor ready
Iteration 4 adv regressor ready
Iteration 4 finished building models
Iteration 5 starting building models
Iteration 5 classifier ready
Iteration 5 time regressor ready
Iteration 5 adv regressor ready
Iteration 5 finished building

In [30]:
# Shows the best results (average) obtained in the 10 iterations
display_results(np.stack(show_best_results(np.asarray(current))))

Unnamed: 0,w_adv,w_time,w_prob,non-weighted error,weighted error,correlation,pvalue,mean,std,c0,...,r0,r10,r20,r30,r40,r50,r60,r70,r80,r90
0,0.2,0.0,0.8,0.024304,0.020065,0.985735,0.000532,0.761694,0.159049,0.0,...,,,,,0.489174,0.548882,0.62246,0.717316,0.857892,0.956911
1,0.2,0.0,0.8,0.024304,0.020065,0.985735,0.000532,0.761694,0.159049,0.0,...,,,,,0.489174,0.548882,0.62246,0.717316,0.857892,0.956911
2,0.2,0.0,0.8,0.024304,0.020065,0.985735,0.000532,0.761694,0.159049,0.0,...,,,,,0.489174,0.548882,0.62246,0.717316,0.857892,0.956911
