In [76]:
import pandas as pd
import pm4py
from datetime import datetime
import numpy as np
from tqdm import tqdm 
import math
from random import sample

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [2]:
application_log = pm4py.read_xes('BPI Challenge 2017.xes')
offer_log = pm4py.read_xes('BPI Challenge 2017 - Offer log.xes')

df_application = pm4py.convert_to_dataframe(application_log)
df_offer = pm4py.convert_to_dataframe(offer_log)

df_application.to_csv('app_logs.csv')
df_offer.to_csv('offer_logs.csv')

parsing log, completed traces :: 100%|██████████| 31509/31509 [00:47<00:00, 657.00it/s]
parsing log, completed traces :: 100%|██████████| 42995/42995 [00:10<00:00, 4021.40it/s]


In [2]:
df_application = pd.read_csv('./../../Assignment_2/Data/app_logs.csv')
#df_offer = pd.read_csv('./../../Assignment_2/Data/offer_logs.csv')

In [3]:
declined_ids = list(df_application.loc[df_application['concept:name'] == 'A_Denied']['case:concept:name'].unique())
accepted_ids = list(df_application.loc[df_application['concept:name'] == 'A_Pending']['case:concept:name'].unique())
cancelled_ids = list(df_application.loc[df_application['concept:name'] == 'A_Cancelled']['case:concept:name'].unique())

In [47]:
# 31,509 loan applications in total
# 98 noise 
len(declined_ids)+ len(accepted_ids)+len(cancelled_ids)

31411

In [4]:
len(declined_ids), len(accepted_ids), len(cancelled_ids)

(3752, 17228, 10431)

In [49]:
def get_min_max_time(ids, i):
    timestamps = df_application.loc[df_application['case:concept:name'] == ids[i]]['time:timestamp']
    return (min(timestamps), max(timestamps))

In [50]:
# declined_ids: 0.19175868569889576
# cancelled_ids: 0.19131614654002713
# approved_ids: 0.19133955289561697

In [None]:
timestamp_list = []
for i in tqdm(range(len(accepted_ids))):
    timestamp_list.append(get_min_max_time(accepted_ids, i))
#all(timestamp_list[i] <= timestamp_list[i+1] for i in range(len(timestamp_list) - 1))

accepted_ids_w_st_time = sorted([(id, time) for id, time in zip(accepted_ids, timestamp_list)], key=lambda x: x[1][0])
accepted_ids_w_en_time = sorted([(id, time) for id, time in zip(accepted_ids, timestamp_list)], key=lambda x: x[1][1])

ids_length = len(timestamp_list)
train_num = int(ids_length * 0.8)

train_ids = [pair[0] for pair in accepted_ids_w_st_time[:train_num]]
test_ids = [pair[0] for pair in accepted_ids_w_en_time[train_num:]]

overlap = list(set(train_ids) & set(test_ids))
len(overlap)

print((len(test_ids)-len(overlap))/(len(timestamp_list)-len(overlap)))

test_ids = list(set(test_ids) - set(overlap))

train_df = df_application.loc[df_application['case:concept:name'].isin(train_ids)]
test_df = df_application.loc[df_application['case:concept:name'].isin(test_ids)]

train_df.to_csv('approved_train.csv')
test_df.to_csv('approved_test.csv')

#
#train_event_log = pm4py.convert_to_event_log(train_df)
#pm4py.write_xes(train_event_log, 'approved_train.xes')
#
#test_event_log = pm4py.convert_to_event_log(test_df)
#pm4py.write_xes(test_event_log, 'approved_test.xes')

# Part 1

In [58]:
def create_train_test(df_application, ids, outcome):

    timestamp_list = []
    for i in tqdm(range(len(ids))):
        timestamp_list.append(get_min_max_time(ids, i))
    #all(timestamp_list[i] <= timestamp_list[i+1] for i in range(len(timestamp_list) - 1))
    
    ids_w_st_time = sorted([(id, time) for id, time in zip(ids, timestamp_list)], key=lambda x: x[1][0])
    ids_w_et_time = sorted([(id, time) for id, time in zip(ids, timestamp_list)], key=lambda x: x[1][1])
    
    ids_length = len(timestamp_list)
    train_num = int(ids_length * 0.8)
    
    train_ids = [pair[0] for pair in ids_w_st_time[:train_num]]
    test_ids = [pair[0] for pair in ids_w_et_time[train_num:]]
    
    overlap = list(set(train_ids) & set(test_ids))
    
    print((len(test_ids)-len(overlap))/(len(timestamp_list)-len(overlap)))
    
    test_ids = list(set(test_ids) - set(overlap))
    
    train_df = df_application.loc[df_application['case:concept:name'].isin(train_ids)]
    test_df = df_application.loc[df_application['case:concept:name'].isin(test_ids)]
    
    train_df.to_csv(f'{outcome}_train.csv')
    test_df.to_csv(f'{outcome}_test.csv')

    return train_df, test_df
    
    

In [59]:
d_train_df, d_test_df = create_train_test(df_application, declined_ids, 'declined')

100%|██████████| 3752/3752 [03:54<00:00, 16.01it/s]


0.19175868569889576


In [60]:
c_train_df, c_test_df = create_train_test(df_application, cancelled_ids, 'cancelled')

100%|██████████| 10431/10431 [10:48<00:00, 16.09it/s]


0.19131614654002713


In [61]:
a_train_df, a_test_df = create_train_test(df_application, accepted_ids, 'approved')

100%|██████████| 17228/17228 [17:48<00:00, 16.12it/s]


0.19133955289561697


In [49]:
def save_xes(train_df, test_df, pre):

    train_event_log = pm4py.convert_to_event_log(train_df)
    pm4py.write_xes(train_event_log, f'{pre}_train.xes')

    test_event_log = pm4py.convert_to_event_log(test_df)
    pm4py.write_xes(test_event_log, f'{pre}_test.xes')

# Part 2 building dataset

In [None]:
def aggregate_df(df):
    """ 
    Aggregate the df of current events in the case

    Output: 
        result -> could be a pandas series
    """

    # record the timestamp of the last activity
    result = df.iloc[0]

    return result


In [None]:
def add_to_aggregate(result, df_row):
    """ 
    When a new event happens, add the event info to the current aggregated result.

    Input: 
         result: the current aggregated result
         df_row: pandas df row representing the new event
    Output:
        result: the new aggregated result
    """

    # record the timestamp of the last activity
    result = df_row.iloc[0]

    return result

In [34]:
def create_prefix_part2(df_application, app_ids, end_event, start_event='A_Accepted'):
    
    app_id_list = list(df_application['case:concept:name'].unique())

    # TODO:
    # create a return df
    return_df = pd.DataFrame()

    # extracting prefix for each application
    for app_id in app_id_list:
        
        events_app = df_application.loc[df_application['case:concept:name'] == app_id]
        events_app.reset_index(drop=True, inplace=True)

        # A_Accepted happens at most 1 time in each case
        # Otherwise will give error - only consider the first A_Accepted
        cur_id = starting_row_id = events_app.loc[events_app['concept:name'] == 'A_Accepted'].index[0]
        pre_events = events_app.iloc[:starting_row_id]
        # TODO: 
        # aggregate events_app from row 0 to starting_row_id
        result = aggregate_df(pre_events)
        
        ending_row_id = events_app.loc[events_app['concept:name'] == end_event].index[0]
        cur_id += 1
        
        while cur_id < ending_row_id:
            new_row = events_app.iloc[cur_id]
            # TODO: 
            # add new event row info to the aggregated result
            result = add_to_aggregate(pre_events)

            # Update the return_df -> add new row
            # target y: end_event

            cur_id += 1

        return return_df


# Part 3 building dataset


In [35]:
#build function to retrieve minimum time 
def get_min_time(ids, i):
    """define function to retrieve the minimum time"""
    return min(df_application.loc[df_application["case:concept:name"] == ids[i]]["time:timestamp"])

#get the minimum time for the accepted ids
min_timestamp_list = []
for i in tqdm(range(len(accepted_ids))):
    min_timestamp_list.append(get_min_time(accepted_ids, i))

#create a list with every id and the start time, and another list with every id and the end time
accepted_ids_begin = sorted([(id, time) for id, time in zip(accepted_ids, min_timestamp_list)], key=lambda x: x[1])
accepted_ids_end = sorted([(id, time) for id, time in zip(accepted_ids, timestamp_list)], key=lambda x: x[1])

#generate dataframe for begin and end times 
df_accepted_ids_time_begin = pd.DataFrame(accepted_ids_begin, columns = ["case:concept:name", "begin"])
df_accepted_ids_time_end = pd.DataFrame(accepted_ids_end, columns = ["case:concept:name", "end"])

#merge dataframes on case:concept:name
df_accepted_timestamps = df_accepted_ids_time_begin.merge(df_accepted_ids_time_end, on = "case:concept:name")

#keep relevant time formatting
df_accepted_timestamps["begin"] = df_accepted_timestamps["begin"].map(lambda x: str(x)[:19])
df_accepted_timestamps["end"] = df_accepted_timestamps["end"].map(lambda x: str(x)[:19])

#create function to calculate the difference in time from a dataframe with two columns containing dates and time
def calc_duration(end, begin):
    """calculate the difference in time using datetime.strptime"""
    return (datetime.strptime(end, "%Y-%m-%d %H:%M:%S") - datetime.strptime(begin, "%Y-%m-%d %H:%M:%S")).total_seconds()

#empty list to gather differences
duration = []

#retrieve the difference between begin and end of the trace and add to a list 
for i in range(0, len(df_accepted_timestamps)):
    duration.append(calc_duration(df_accepted_timestamps.iloc[i]["end"], df_accepted_timestamps.iloc[i]["begin"]))

#add the time difference to the df 
df_accepted_timestamps["duration"] = duration

#remove all cases with case time duration 0 
df_accepted_timestamps = df_accepted_timestamps.loc[df_accepted_timestamps["duration"] > 0]

#get indexes to filter outliers top and bottom 5%
outliers_index = list(range(0, round(0.05 * len(df_accepted_timestamps)))) + list(range(round(0.95 * len(df_accepted_timestamps)), len(df_accepted_timestamps)))

#sort values from small to big time difference and drop respective rows
df_accepted_timestamps = df_accepted_timestamps.sort_values(by= "duration", ignore_index = True).drop(labels = outliers_index, axis = "index")

In [None]:
df_accepted_timestamps_begin = df_accepted_timestamps.sort_values(by = "begin").reset_index(drop = True)
df_accepted_timestamps_end = df_accepted_timestamps.sort_values(by = "end").reset_index(drop = True)

#test set range
begin_index_test = round(0.8 * len(df_accepted_timestamps_begin))
begin_time_test = df_accepted_timestamps_begin.iloc[begin_index_test]["begin"]
x = calc_duration(max(df_accepted_timestamps["end"]), begin_time_test)

#train set range
end_index_train = round(0.8 * len(df_accepted_timestamps_end))
end_time_train = df_accepted_timestamps_end.iloc[end_index_train]["end"]
y = calc_duration(end_time_train, min(df_accepted_timestamps["begin"]))
 
total_with_overlap = x + y
total_time = calc_duration(max(df_accepted_timestamps["end"]), min(df_accepted_timestamps["begin"]))
overlap_span = total_with_overlap - total_time

overlap_train = 0.8 * overlap_span
overlap_test = 0.2 * overlap_span

end_time_train_with_overlap = datetime.strptime(end_time_train, "%Y-%m-%d %H:%M:%S") 
date_index_train = datetime.strftime((end_time_train_datetime - timedelta(seconds = overlap_train)), "%Y-%m-%d %H:%M:%S")

begin_time_test_with_overlap = datetime.strptime(begin_time_test, "%Y-%m-%d %H:%M:%S") 
date_index_test = datetime.strftime((begin_time_test_datetime + timedelta(seconds = overlap_test)), "%Y-%m-%d %H:%M:%S")

df_train = df_accepted_timestamps_end.loc[df_accepted_timestamps_end["end"] < date_index_train]
df_test = df_accepted_timestamps_begin.loc[df_accepted_timestamps_begin["begin"] > date_index_test]

# Part 4 building dataset
---

#### First XOR - test: 

        W_Validate application -> A_Validating XOR W_Call incomplete files 

In [76]:
df_cancel_tr = pm4py.convert_to_dataframe(pm4py.read_xes('./../../Assignment_2/Data/cancelled_train.xes'))
df_cancel_te = pm4py.convert_to_dataframe(pm4py.read_xes('./../../Assignment_2/Data/cancelled_test.xes'))

parsing log, completed traces :: 100%|██████████| 8344/8344 [00:22<00:00, 370.96it/s]
parsing log, completed traces :: 100%|██████████| 2087/2087 [00:05<00:00, 371.85it/s]


In [77]:
df_cancel_tr.to_csv('./../../Assignment_2/Data/cancelled_train.csv')
df_cancel_te.to_csv('./../../Assignment_2/Data/cancelled_test.csv')

In [5]:
df_cancel_tr = pd.read_csv('./../../Assignment_2/Data/cancelled_train.csv')
df_cancel_te = pd.read_csv('./../../Assignment_2/Data/cancelled_test.csv')

In [57]:
df_approve_tr = pd.read_csv('./../../Assignment_2/Data/approved_train.csv')
df_approve_te = pd.read_csv('./../../Assignment_2/Data/approved_test.csv')

In [7]:
all_columns = list(df_cancel_tr.columns)

In [139]:
case_attr = all_columns[all_columns.index('FirstWithdrawalAmount'):]

event_attr_cat = ['org:resource', 'concept:name', 'lifecycle:transition']
event_attr_num = ['time:timestamp']

result_df_columns = case_attr + ['time_to_current'] + ['last_time:timestamp']

resources = list(df_cancel_tr['org:resource'].unique())

df_cancel_tr['event_w_lifecycly'] = df_cancel_tr.apply(lambda row: row['concept:name'].replace(' ', '_') + '_' + row['lifecycle:transition'], axis=1)
events = list(df_cancel_tr['concept:name'].unique())
lifecycles = list(df_cancel_tr['lifecycle:transition'].unique())
events_w_lifecycle = list(df_cancel_tr['event_w_lifecycly'].unique())

result_df_columns.extend(events)
result_df_columns.extend(lifecycles)
result_df_columns.extend(events_w_lifecycle)

return_df = pd.DataFrame(columns=result_df_columns)

In [93]:
def aggregate_df(df, res_cols):
    """ 
    Aggregate the df of current events in the case

    Output: 
        result -> could be a pandas series
    """
    res_dict = dict.fromkeys(res_cols)
    
    for row in df.to_dict('records'):

        # user variable assign
        user = row['org:resource']
        # event variable assign
        event = row['concept:name'] + ' ' + row['lifecycle:transition']

        try:
            # first mention, turn it numeric
            if not res_dict[user]: res_dict[user] = 1
            else: res_dict[user] += 1

            if not res_dict[event]: res_dict[event] = 1
            else: res_dict[event] += 1
        except:
            # when test set has a unique resource (user) or event not in train set
            pass

        # case level cols
        rest_cols_1 = ['FirstWithdrawalAmount',
       'NumberOfTerms', 'Accepted', 'MonthlyCost', 'Selected', 'CreditScore',
       'OfferedAmount'] 

        for col in rest_cols_1: # always get the newest record
            if not math.isnan(row[col]) and row[col] != res_dict[col]:
                res_dict[col] = row[col]

        
        # time stuff
        #keeping the first timestamp of the case for calculation purposes
        if not res_dict['first_timestamp']: res_dict['first_timestamp'] = row['time:timestamp']


    rest_cols_2 = ['case:LoanGoal', 'case:ApplicationType', 'case:RequestedAmount']
    for col in rest_cols_2:
          res_dict[col] = row[col]  
    
    # trace duration in seconds
    res_dict['trace_duration'] = (np.datetime64(row['time:timestamp']) - np.datetime64(res_dict['first_timestamp'])).item().total_seconds()

    return res_dict

In [59]:
cancel_events = list(df_cancel_tr['concept:name'].unique())
approve_events = list(df_approve_tr['concept:name'].unique())

In [61]:
common_events = set(approve_events) & set(cancel_events)
only_cancel = list(set(cancel_events) - common_events)
only_approve = list(set(approve_events) - common_events)

In [None]:
list(common_events)

In [62]:
only_cancel, only_approve

(['A_Cancelled'], ['A_Pending', 'W_Personal Loan collection', 'O_Accepted'])

In [81]:
len(df_cancel_tr['case:concept:name'].unique()), len(df_cancel_te['case:concept:name'].unique())

(8344, 1974)

In [82]:
df_train = pd.concat([df_cancel_tr, df_approve_tr[df_approve_tr['case:concept:name'].isin(sample(list(df_approve_tr['case:concept:name'].unique()), 8000))]])
df_test = pd.concat([df_cancel_te, df_approve_te[df_approve_te['case:concept:name'].isin(sample(list(df_approve_te['case:concept:name'].unique()), 2000))]])


In [94]:
def create_feature_columns(df_train):
       df_train['event_w_lifecycle'] = df_train['concept:name'] + ' ' + df_train['lifecycle:transition']

       org_resource_cols = list(df_train['org:resource'].unique())
       event_cols = list(df_train['event_w_lifecycle'].unique())
       rest_cols = ['FirstWithdrawalAmount',
              'NumberOfTerms', 'Accepted', 'MonthlyCost', 'Selected', 'CreditScore',
              'OfferedAmount', 'case:LoanGoal', 'case:ApplicationType', 'case:RequestedAmount']
       target_cols = ['first_timestamp', 'trace_duration']

       res_cols = org_resource_cols + event_cols + rest_cols + target_cols

       return res_cols

In [95]:
def create_dataset_part4_first_xor(df_application, current_event='A_Complete', next_event=[], result_columns=None, test=True):
    """
    Encode original applications 
    """
    
    result_dict = []
    
    app_id_list = list(df_application['case:concept:name'].unique())
    if test: 
        cnt = int(len(app_id_list) * 0.1)
        app_id_list = app_id_list[:cnt] + app_id_list[-cnt:]
    for app_id in tqdm(app_id_list):
    
        events_app = df_application.loc[df_application['case:concept:name'] == app_id]
        events_app.reset_index(drop=True, inplace=True)
    
        # same event could happen multiple times in a case
        starting_rows = events_app.loc[(events_app['concept:name'] == current_event) & (events_app['lifecycle:transition'] == 'complete')]
        starting_row_ids = list(starting_rows.index)
        
        for starting_row_id in starting_row_ids:
            #print(app_id)
            pre_events = events_app.iloc[:starting_row_id+1]
            pos_events = events_app.iloc[starting_row_id+1:]
            
            # aggregate events_app from row 0 to starting_row_id
            result = aggregate_df(pre_events, result_columns)
    
            # XOR with 2 results
            target = list(pos_events['concept:name'].unique())
            result['next_event'] = next_event[0] in target
    
            result_dict.append(result)
    
    return pd.DataFrame.from_dict(result_dict).fillna(0)
            
        
        

In [96]:
result_columns = create_feature_columns(df_train)

df_train_case = create_dataset_part4_first_xor(df_train, next_event=['A_Pending', 'A_Cancelled'], result_columns=result_columns)
df_test_case = create_dataset_part4_first_xor(df_test, next_event=['A_Pending', 'A_Cancelled'], result_columns=result_columns)

  res_dict['trace_duration'] = (np.datetime64(row['time:timestamp']) - np.datetime64(res_dict['first_timestamp'])).item().total_seconds()
100%|██████████| 3268/3268 [01:43<00:00, 31.62it/s]
100%|██████████| 794/794 [00:07<00:00, 111.68it/s]


In [101]:
len(df_train_case),  len(df_train['case:concept:name'].unique()) * 0.2

(3255, 3268.8)

In [102]:
len(df_train['case:concept:name'].unique()), len(df_train.loc[df_train['concept:name'] == 'A_Complete'])

(16344, 16263)

In [103]:
df_train_case.to_csv('part_4_train_tt.csv')
df_test_case.to_csv('part_4_test_tt.csv')

In [105]:
df_train_case.tail(10)

Unnamed: 0,User_1,User_19,User_11,User_17,User_8,User_3,User_38,User_113,User_100,User_28,User_27,User_43,User_15,User_16,User_89,User_112,User_4,User_71,User_108,User_37,User_85,User_7,User_13,User_20,User_52,User_2,User_14,User_31,User_6,User_95,User_109,User_70,User_10,User_21,User_114,User_102,User_87,User_47,User_5,User_44,User_33,User_34,User_35,User_29,User_26,User_74,User_42,User_49,User_99,User_22,User_116,User_92,User_68,User_30,User_41,User_75,User_24,User_12,User_117,User_9,User_110,User_119,User_101,User_36,User_115,User_18,User_144,User_118,User_120,User_25,User_23,User_86,User_90,User_73,User_142,User_141,User_76,User_32,User_50,User_125,User_145,User_97,User_61,User_39,User_148,User_127,User_128,User_46,User_121,User_103,User_122,User_60,User_83,User_40,User_53,User_48,User_146,User_107,User_51,User_123,User_143,User_45,User_54,User_63,User_124,User_138,User_93,User_64,User_126,User_58,User_57,User_106,User_79,User_62,User_78,User_133,User_82,User_55,User_66,User_139,User_56,User_91,User_111,User_59,User_132,User_130,User_67,User_104,User_131,User_65,User_77,User_84,User_80,User_96,User_136,User_129,User_69,User_134,A_Create Application complete,A_Submitted complete,W_Handle leads schedule,W_Handle leads withdraw,W_Complete application schedule,A_Concept complete,W_Complete application start,A_Accepted complete,O_Create Offer complete,O_Created complete,O_Sent (mail and online) complete,W_Complete application complete,W_Call after offers schedule,W_Call after offers start,A_Complete complete,W_Call after offers suspend,W_Call after offers resume,A_Cancelled complete,O_Cancelled complete,W_Call after offers ate_abort,W_Complete application suspend,W_Complete application resume,W_Complete application ate_abort,W_Validate application schedule,W_Validate application start,A_Validating complete,O_Returned complete,W_Validate application suspend,W_Validate application ate_abort,W_Call incomplete files schedule,W_Call incomplete files start,A_Incomplete complete,W_Call incomplete files suspend,W_Call incomplete files resume,W_Call incomplete files ate_abort,O_Sent (online only) complete,W_Handle leads start,W_Handle leads complete,W_Validate application resume,W_Validate application complete,W_Call incomplete files complete,W_Complete application withdraw,W_Call after offers complete,W_Handle leads suspend,W_Handle leads resume,W_Assess potential fraud schedule,W_Assess potential fraud start,W_Assess potential fraud suspend,W_Assess potential fraud resume,W_Assess potential fraud complete,W_Handle leads ate_abort,W_Shortened completion schedule,W_Shortened completion start,W_Shortened completion suspend,W_Shortened completion resume,O_Accepted complete,A_Pending complete,W_Personal Loan collection schedule,W_Personal Loan collection start,W_Personal Loan collection suspend,W_Personal Loan collection resume,W_Assess potential fraud ate_abort,W_Assess potential fraud withdraw,FirstWithdrawalAmount,NumberOfTerms,Accepted,MonthlyCost,Selected,CreditScore,OfferedAmount,case:LoanGoal,case:ApplicationType,case:RequestedAmount,first_timestamp,trace_duration,next_event
3245,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,9.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,1.0,1.0,1.0,1,1,1.0,1,1,1,1.0,1.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,20000.0,60.0,True,371.17,True,878.0,20000.0,Car,New credit,20000.0,2016-10-18 08:51:42.172000+00:00,170163.713,True
3246,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,12.0,0.0,0,0,0,0,1,0.0,0.0,0.0,1,1,1.0,1,1,1,1.0,1.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,134.0,True,100.0,True,839.0,10000.0,Home improvement,Limit raise,10000.0,2016-10-18 09:04:18.477000+00:00,601.295,True
3247,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,12.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0.0,0.0,0.0,1,1,1.0,1,1,1,1.0,1.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,126.0,True,400.0,True,0.0,40000.0,Unknown,Limit raise,0.0,2016-10-18 09:06:20.637000+00:00,150.016,True
3248,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,9.0,0,0.0,0.0,2.0,2.0,0.0,0,0,0,0,1,0.0,0.0,0.0,1,1,1.0,1,1,1,1.0,1.0,1,1,1,0,0,0,0.0,0,3.0,3.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,126.0,False,320.0,True,0.0,32000.0,Unknown,Limit raise,0.0,2016-10-18 09:30:06.339000+00:00,183303.148,True
3249,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,7.0,0.0,0,0,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,2.0,0.0,0,0,0,0,1,0.0,0.0,0.0,1,1,1.0,1,1,1,1.0,0.0,1,1,1,0,0,0,0.0,0,2.0,1.0,1.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,2000.0,134.0,True,130.0,True,775.0,13000.0,Car,Limit raise,13000.0,2016-10-18 09:34:06.469000+00:00,266088.593,True
3250,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,8.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,1.0,1.0,1.0,1,1,0.0,1,1,1,1.0,0.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,82.0,True,500.0,True,733.0,35000.0,Car,New credit,35000.0,2016-10-18 10:31:35.256000+00:00,91691.652,True
3251,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,4.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,12.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,1.0,1.0,0.0,1,1,1.0,1,2,2,2.0,1.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,1.0,0.0,0,0,0,0.0,0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,30000.0,125.0,False,650.0,False,0.0,64500.0,"Other, see explanation",New credit,65000.0,2016-10-18 11:12:05.182000+00:00,167404.402,True
3252,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,8.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,1.0,1.0,1.0,1,1,0.0,1,1,1,1.0,0.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,10000.0,44.0,True,249.58,True,1083.0,10000.0,Car,New credit,10000.0,2016-10-18 11:24:50.857000+00:00,82687.083,True
3253,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,8.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,1.0,1.0,1.0,1,1,0.0,1,1,1,1.0,0.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,1.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,5000.0,36.0,True,152.82,True,908.0,5000.0,Car,New credit,5000.0,2016-10-18 12:00:33.792000+00:00,89843.38,True
3254,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,12.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,1,0.0,0.0,0.0,1,1,1.0,1,1,1,1.0,1.0,1,1,1,0,0,0,0.0,0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,25000.0,60.0,False,461.78,False,0.0,25000.0,Car,New credit,25000.0,2016-10-18 12:02:09.032000+00:00,515.051,True
