In [1]:
import pandas as pd
from numpy import mean

from sklearn.model_selection import train_test_split

# Feature engineering
from sklearn.impute  import SimpleImputer
from feature_engine.encoding import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler

# sampling
from imblearn.under_sampling import TomekLinks 
from imblearn.combine import SMOTETomek

#Modelling
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import cross_val_score

# Reporting
from sklearn.metrics import classification_report,confusion_matrix


#from imblearn.ensemble import BalancedRandomForestClassifier

  from pandas import MultiIndex, Int64Index


## Common Functions

In [2]:
def calc_sum_capital_paid_account_0_24m(data):
    # add new variable (sm_capital_paid_account_0_24m)the data
    # sm_capital_paid_account_0_24m = capital_paid_account_0_12m + m_capital_paid_account_12_24m
    data['sum_capital_paid_account_0_24m'] = data.apply(lambda row : row['sum_capital_paid_account_0_12m'] + row['sum_capital_paid_account_12_24m'], axis=1)
    return data

In [3]:
def calc_num_of_paid_inv_0_12m(data):
    # add new variable (num_of_paid_inv_0_12m)the data
    # num_of_paid_inv_0_12m = num_active_inv / num_active_div_by_paid_inv_0_12m
    data['num_of_paid_inv_0_12m'] = data.apply(lambda row: 0 if row['num_active_inv'] == 0 else row['num_active_inv'] / row['num_active_div_by_paid_inv_0_12m'], axis=1)
    return data

In [4]:
def calc_status_max_active_0_24(data):
    # add new variable (status_max_active_0_24)the data
    # status_max_active_0_24 is the max(account_worst_status_0_3m, account_worst_status_12_24m, account_worst_status_3_6m, account_worst_status_6_12m)
    data['status_max_active_0_24'] = data.apply(lambda row: max(row['account_worst_status_0_3m'], row['account_worst_status_12_24m'], row['account_worst_status_3_6m'], row['account_worst_status_6_12m']), axis=1)
    return data

In [5]:
def XGBClassifier_gridSearch(X_train, y_train, X_test, y_test, weights = [0, 1, 10, 25, 50, 75, 99, 100, 1000]):
    """
        Train a XGBoost Classifier over a grid of given parametrs  (Class weights)
        Output:
            Best Model
    
    """
    # define model
    model = XGBClassifier()
    
    # define grid
    param_grid = dict(scale_pos_weight=weights)
    
    # define evaluation procedure
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    
    # define grid search
    #scoring=['accuracy','precision_macro','recall_macro', 'f1_micro']
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv, scoring='precision_macro')
    
    # execute the grid search
    grid_result = grid.fit(X_train, y_train)
    
    # report the best configuration
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    
    # report all classification_report
    xgb_grid_predict = grid.predict(X_test)
    print(classification_report(y_test,xgb_grid_predict))
    
    return grid


In [6]:
def RandomForestClassifier_gridSearch(X_train, y_train, X_test, y_test, class_weight=['balanced_subsample', 'balanced']):
    """
        Train a Random Forest Classifier over a grid of given parametrs (Class weights)
        Output:
            Best Model
    
    """
    # define model
    model = RandomForestClassifier()
    
    # define grid
    param_grid = dict(class_weight=class_weight)
    
    # define evaluation procedure
    cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
    
    # define grid search
    #scoring=['accuracy','precision_macro','recall_macro', 'f1_micro']
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=cv, scoring='precision_macro')
    
    # execute the grid search
    grid_result = grid.fit(X_train, y_train)
    
    # report the best configuration
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    
    # report all classification_report
    RF_grid_predict = grid.predict(X_test)
    print(classification_report(y_test,RF_grid_predict))
    
    return grid


## Load the Data

In [7]:
data = pd.read_csv("../data/training_dataset.csv")
data.head()

Unnamed: 0,uuid,default,account_amount_added_12_24m,account_days_in_dc_12_24m,account_days_in_rem_12_24m,account_days_in_term_12_24m,account_incoming_debt_vs_paid_0_24m,account_status,account_worst_status_0_3m,account_worst_status_12_24m,...,status_3rd_last_archived_0_24m,status_max_archived_0_6_months,status_max_archived_0_12_months,status_max_archived_0_24_months,recovery_debt,sum_capital_paid_account_0_12m,sum_capital_paid_account_12_24m,sum_paid_inv_0_12m,time_hours,worst_status_active_inv
0,63f69b2c-8b1c-4740-b78d-52ed9a4515ac,0.0,0,0.0,0.0,0.0,0.0,1.0,1.0,,...,1,1,1,1,0,0,0,178839,9.653333,1.0
1,0e961183-8c15-4470-9a5e-07a1bd207661,0.0,0,0.0,0.0,0.0,,1.0,1.0,1.0,...,1,1,2,2,0,0,0,49014,13.181389,
2,d8edaae6-4368-44e0-941e-8328f203e64e,0.0,0,0.0,0.0,0.0,,,,,...,1,1,2,2,0,0,0,124839,11.561944,1.0
3,0095dfb6-a886-4e2a-b056-15ef45fdb0ef,0.0,0,,,,,,,,...,1,1,1,1,0,0,0,324676,15.751111,1.0
4,c8f8b835-5647-4506-bf15-49105d8af30b,0.0,0,0.0,0.0,0.0,,,,,...,0,1,1,1,0,0,0,7100,12.698611,


In [8]:
test_data = pd.read_csv("../data/testing_dataset.csv")
test_data.head()

Unnamed: 0,uuid,default,account_amount_added_12_24m,account_days_in_dc_12_24m,account_days_in_rem_12_24m,account_days_in_term_12_24m,account_incoming_debt_vs_paid_0_24m,account_status,account_worst_status_0_3m,account_worst_status_12_24m,...,status_3rd_last_archived_0_24m,status_max_archived_0_6_months,status_max_archived_0_12_months,status_max_archived_0_24_months,recovery_debt,sum_capital_paid_account_0_12m,sum_capital_paid_account_12_24m,sum_paid_inv_0_12m,time_hours,worst_status_active_inv
0,6f6e6c6a-2081-4e6b-8eb3-4fd89b54b2d7,,0,0.0,0.0,0.0,0.009135,1.0,1.0,,...,1,1,1,1,0,8815,0,27157,19.895556,
1,f6f6d9f3-ef2b-4329-a388-c6a687f27e70,,0,0.0,0.0,0.0,,,,,...,0,0,0,0,0,0,0,0,0.236667,
2,e9c39869-1bc5-4375-b627-a2df70b445ea,,50956,0.0,77.0,0.0,0.0,1.0,1.0,2.0,...,2,1,1,3,0,36163,39846,93760,20.332778,
3,6beb88a3-9641-4381-beb6-c9a208664dd0,,35054,0.0,0.0,0.0,0.0,1.0,1.0,1.0,...,0,2,2,2,0,62585,0,1790,6.201111,
4,bb89b735-72fe-42a4-ba06-d63be0f4ca36,,0,0.0,0.0,0.0,0.0,1.0,2.0,,...,0,0,0,0,0,14295,0,0,8.451111,


In [9]:
data['sum_capital_paid_account_0_12m']

0        0
1        0
2        0
3        0
4        0
        ..
89971    0
89972    0
89973    0
89974    0
89975    0
Name: sum_capital_paid_account_0_12m, Length: 89976, dtype: int64

## Adding new features

In [10]:
data = calc_sum_capital_paid_account_0_24m(data)

In [11]:
data = calc_num_of_paid_inv_0_12m(data)

In [12]:
data = calc_status_max_active_0_24(data)

In [13]:
data.head()

Unnamed: 0,uuid,default,account_amount_added_12_24m,account_days_in_dc_12_24m,account_days_in_rem_12_24m,account_days_in_term_12_24m,account_incoming_debt_vs_paid_0_24m,account_status,account_worst_status_0_3m,account_worst_status_12_24m,...,status_max_archived_0_24_months,recovery_debt,sum_capital_paid_account_0_12m,sum_capital_paid_account_12_24m,sum_paid_inv_0_12m,time_hours,worst_status_active_inv,sum_capital_paid_account_0_24m,num_of_paid_inv_0_12m,status_max_active_0_24
0,63f69b2c-8b1c-4740-b78d-52ed9a4515ac,0.0,0,0.0,0.0,0.0,0.0,1.0,1.0,,...,1,0,0,0,178839,9.653333,1.0,0,13.0,1.0
1,0e961183-8c15-4470-9a5e-07a1bd207661,0.0,0,0.0,0.0,0.0,,1.0,1.0,1.0,...,2,0,0,0,49014,13.181389,,0,0.0,1.0
2,d8edaae6-4368-44e0-941e-8328f203e64e,0.0,0,0.0,0.0,0.0,,,,,...,2,0,0,0,124839,11.561944,1.0,0,14.0,
3,0095dfb6-a886-4e2a-b056-15ef45fdb0ef,0.0,0,,,,,,,,...,1,0,0,0,324676,15.751111,1.0,0,32.0,
4,c8f8b835-5647-4506-bf15-49105d8af30b,0.0,0,0.0,0.0,0.0,,,,,...,1,0,0,0,7100,12.698611,,0,0.0,


## Removing unwanted features

In [14]:
unwanted_fets = ['uuid', 'age', 'name_in_email', 'merchant_group', 'time_hours', 'avg_payment_span_0_12m', 'avg_payment_span_0_3m'
                 , 'account_incoming_debt_vs_paid_0_24m', 'num_arch_written_off_0_12m', "num_arch_written_off_12_24m", "recovery_debt", 'account_days_in_dc_12_24m'
                 , 'account_days_in_term_12_24m', 'num_arch_dc_0_12m','num_arch_dc_12_24m' , 'num_active_inv', 'max_paid_inv_0_12m', 'sum_capital_paid_account_0_12m', 'sum_capital_paid_account_12_24m'
                 , 'account_worst_status_0_3m', 'account_worst_status_12_24m', 'account_worst_status_3_6m', 'account_worst_status_6_12m'
                 , 'worst_status_active_inv', 'account_status', 'status_max_active_0_24' ]
data.drop(columns=unwanted_fets, inplace=True)

In [15]:
data.head()

Unnamed: 0,default,account_amount_added_12_24m,account_days_in_rem_12_24m,merchant_category,has_paid,max_paid_inv_0_24m,num_active_div_by_paid_inv_0_12m,num_arch_ok_0_12m,num_arch_ok_12_24m,num_arch_rem_0_12m,num_unpaid_bills,status_last_archived_0_24m,status_2nd_last_archived_0_24m,status_3rd_last_archived_0_24m,status_max_archived_0_6_months,status_max_archived_0_12_months,status_max_archived_0_24_months,sum_paid_inv_0_12m,sum_capital_paid_account_0_24m,num_of_paid_inv_0_12m
0,0.0,0,0.0,Dietary supplements,True,31638.0,0.153846,13,14,0,2,1,1,1,1,1,1,178839,0,13.0
1,0.0,0,0.0,Books & Magazines,True,13749.0,0.0,9,19,3,0,1,1,1,1,2,2,49014,0,0.0
2,0.0,0,0.0,Diversified entertainment,True,29890.0,0.071429,11,0,3,1,1,1,1,1,2,2,124839,0,14.0
3,0.0,0,,Diversified entertainment,True,40040.0,0.03125,31,21,0,1,1,1,1,1,1,1,324676,0,32.0
4,0.0,0,0.0,Electronic equipment & Related accessories,True,7100.0,0.0,1,0,0,0,1,0,0,1,1,1,7100,0,0.0


In [16]:
len(data.columns)

20

## Split dataset into train and test

In [17]:
X_train, X_test, y_train, y_test = train_test_split(
    data.drop('default', axis=1), # predictors
    data.default, # target
    test_size=0.2,
    random_state=0)  # for reproducibility

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((71980, 19), (17996, 19), (71980,), (17996,))

## Feature engineering

In [18]:
# define list for both categorical and continuous/numerical features
con_fets = ['num_arch_ok_0_12m', 'account_amount_added_12_24m', 'account_days_in_rem_12_24m', 'max_paid_inv_0_24m', 'num_active_div_by_paid_inv_0_12m', 'num_arch_ok_12_24m', 'num_arch_rem_0_12m', 'num_unpaid_bills', 'sum_paid_inv_0_12m', 'sum_capital_paid_account_0_24m', 'num_of_paid_inv_0_12m']
cat_fets = list(set(data.columns) - set(con_fets) - set(['default']))


In [19]:
# check for missing values in our continuous/numerical features
X_train[con_fets].isnull().mean().sort_values(ascending=False)

num_active_div_by_paid_inv_0_12m    0.231425
account_days_in_rem_12_24m          0.118533
num_of_paid_inv_0_12m               0.021464
num_arch_ok_0_12m                   0.000000
account_amount_added_12_24m         0.000000
max_paid_inv_0_24m                  0.000000
num_arch_ok_12_24m                  0.000000
num_arch_rem_0_12m                  0.000000
num_unpaid_bills                    0.000000
sum_paid_inv_0_12m                  0.000000
sum_capital_paid_account_0_24m      0.000000
dtype: float64

In [20]:
# check for missing values in our categorical features
X_train[cat_fets].isnull().mean().sort_values(ascending=False)

status_last_archived_0_24m         0.0
status_max_archived_0_6_months     0.0
status_max_archived_0_24_months    0.0
has_paid                           0.0
status_max_archived_0_12_months    0.0
status_2nd_last_archived_0_24m     0.0
merchant_category                  0.0
status_3rd_last_archived_0_24m     0.0
dtype: float64

In [21]:
# Fill missing numerical values 
imputer = SimpleImputer(strategy='most_frequent')
X_train[con_fets] = imputer.fit_transform(X_train[con_fets])
X_test[con_fets] = imputer.transform(X_test[con_fets])

In [22]:
# scaling numerical values 
scaler = MinMaxScaler()
X_train[con_fets] = scaler.fit_transform(X_train[con_fets])
X_test[con_fets] = scaler.transform(X_test[con_fets])

In [23]:
# encode merchant_category
ordinal_enc = OneHotEncoder()
X_train = ordinal_enc.fit_transform(X_train)
X_test = ordinal_enc.transform(X_test)

In [24]:
type(X_train)

pandas.core.frame.DataFrame

In [25]:
X_train.columns

Index(['account_amount_added_12_24m', 'account_days_in_rem_12_24m', 'has_paid',
       'max_paid_inv_0_24m', 'num_active_div_by_paid_inv_0_12m',
       'num_arch_ok_0_12m', 'num_arch_ok_12_24m', 'num_arch_rem_0_12m',
       'num_unpaid_bills', 'status_last_archived_0_24m',
       'status_2nd_last_archived_0_24m', 'status_3rd_last_archived_0_24m',
       'status_max_archived_0_6_months', 'status_max_archived_0_12_months',
       'status_max_archived_0_24_months', 'sum_paid_inv_0_12m',
       'sum_capital_paid_account_0_24m', 'num_of_paid_inv_0_12m',
       'merchant_category_Diversified entertainment',
       'merchant_category_Diversified electronics',
       'merchant_category_Automotive Parts & Accessories',
       'merchant_category_Personal care & Body improvement',
       'merchant_category_Youthful Shoes & Clothing',
       'merchant_category_Books & Magazines',
       'merchant_category_Concept stores & Miscellaneous',
       'merchant_category_Sex toys', 'merchant_category_Diet

## Modelling - Baseline model

In [26]:
xgb_model = XGBClassifier(learning_rate=0.001,
                            max_depth = 1, 
                            n_estimators = 100)
xgb_model.fit(X_train, y_train)
#scale_pos_weight=0                              

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.001, max_delta_step=0,
              max_depth=1, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=100, n_jobs=8,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [27]:
xgb_predict = xgb_model.predict(X_test)
confusion_mat = confusion_matrix(y_test,xgb_predict)
print("confusion_matrix ", confusion_mat)
print(classification_report(y_test,xgb_predict))

confusion_matrix  [[17733     0]
 [  263     0]]
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99     17733
         1.0       0.00      0.00      0.00       263

    accuracy                           0.99     17996
   macro avg       0.49      0.50      0.50     17996
weighted avg       0.97      0.99      0.98     17996



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Conclusion 
 - Data is imbalanced which affects the model performance
 - Moving forward to tune the class weight and try Random Forest Classifier

## Modelling - Tune class weight

In [28]:
XGB_grid = XGBClassifier_gridSearch(X_train, y_train, X_test, y_test)

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


Best: 0.756579 using {'scale_pos_weight': 1}
              precision    recall  f1-score   support

         0.0       0.99      1.00      0.99     17733
         1.0       0.44      0.07      0.12       263

    accuracy                           0.99     17996
   macro avg       0.71      0.53      0.56     17996
weighted avg       0.98      0.99      0.98     17996



In [29]:
RF_grid = RandomForestClassifier_gridSearch(X_train, y_train, X_test, y_test)

Best: 0.513992 using {'class_weight': 'balanced_subsample'}
              precision    recall  f1-score   support

         0.0       0.99      0.92      0.95     17733
         1.0       0.04      0.23      0.07       263

    accuracy                           0.91     17996
   macro avg       0.51      0.57      0.51     17996
weighted avg       0.97      0.91      0.94     17996



### Conclusion 
 - Random Forest is better ehen predicting defaulting (label 1)
 - Results are unsatisfactory for both model
 - Try OverSampling prior modeling

## Modelling -  Over sampling
 - Train SMOTETomek and TomekLinks on the training data 
 - Select the best performer

In [30]:
# Over sample the data to get a balanced data
tl = SMOTETomek()#TomekLinks(sampling_strategy='not majority')
X_res, y_res = tl.fit_resample(X_train.fillna(0), y_train)

In [31]:
X_res.shape, X_train.shape

((141732, 75), (71980, 75))

In [32]:
X_res.head()

Unnamed: 0,account_amount_added_12_24m,account_days_in_rem_12_24m,has_paid,max_paid_inv_0_24m,num_active_div_by_paid_inv_0_12m,num_arch_ok_0_12m,num_arch_ok_12_24m,num_arch_rem_0_12m,num_unpaid_bills,status_last_archived_0_24m,...,merchant_category_Bags & Wallets,merchant_category_Underwear,merchant_category_Tobacco,merchant_category_Office machines & Related accessories (excl. computers),merchant_category_Safety products,merchant_category_Cleaning & Sanitary,"merchant_category_Wine, Beer & Liquor",merchant_category_Car electronics,merchant_category_Non,merchant_category_Education
0,0.027311,0.0,True,0.01457,0.027778,0.015326,0.046053,0.0,0.021978,1,...,0,0,0,0,0,0,0,0,0,0
1,0.0,0.0,True,0.083513,0.0,0.034483,0.032895,0.0,0.0,1,...,0,0,0,0,0,0,0,0,0,0
2,0.087573,0.035616,True,0.09962,0.007407,0.076628,0.049342,0.190476,0.010989,1,...,0,0,0,0,0,0,0,0,0,0
3,0.0,0.0,True,0.052624,0.0,0.003831,0.003289,0.0,0.0,1,...,0,0,0,0,0,0,0,0,0,0
4,0.0,0.0,True,0.029326,0.0,0.011494,0.009868,0.0,0.010989,1,...,0,0,0,0,0,0,0,0,0,0


In [33]:
XGB_grid_oversample = XGBClassifier_gridSearch(X_res, y_res, X_test, y_test)

  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


Best: 0.929768 using {'scale_pos_weight': 1}
              precision    recall  f1-score   support

         0.0       0.99      0.91      0.95     17733
         1.0       0.06      0.38      0.10       263

    accuracy                           0.90     17996
   macro avg       0.52      0.65      0.52     17996
weighted avg       0.98      0.90      0.94     17996



In [34]:
RF_grid_oversample = RandomForestClassifier_gridSearch(X_res, y_res, X_test, y_test)

Best: 0.937988 using {'class_weight': 'balanced_subsample'}
              precision    recall  f1-score   support

         0.0       0.99      0.92      0.95     17733
         1.0       0.06      0.33      0.10       263

    accuracy                           0.91     17996
   macro avg       0.52      0.63      0.52     17996
weighted avg       0.98      0.91      0.94     17996



### Conclusion 
 - XGBoost performed better
 - Runing SMOTETomek got higher results than running TomekLinks


## Prediction on unlabeled data
 - Run the best model on the data 

In [35]:
uuid_df = test_data[['uuid']]

In [36]:
# add new features
test_data = calc_sum_capital_paid_account_0_24m(test_data)
test_data = calc_num_of_paid_inv_0_12m(test_data)
test_data = calc_status_max_active_0_24(test_data)

In [38]:
# drop unwanted columns
test_data.drop(columns=unwanted_fets, inplace=True)

In [46]:
test_data.drop(columns=['default'], inplace=True)

In [47]:
test_data.columns

Index(['account_amount_added_12_24m', 'account_days_in_rem_12_24m',
       'merchant_category', 'has_paid', 'max_paid_inv_0_24m',
       'num_active_div_by_paid_inv_0_12m', 'num_arch_ok_0_12m',
       'num_arch_ok_12_24m', 'num_arch_rem_0_12m', 'num_unpaid_bills',
       'status_last_archived_0_24m', 'status_2nd_last_archived_0_24m',
       'status_3rd_last_archived_0_24m', 'status_max_archived_0_6_months',
       'status_max_archived_0_12_months', 'status_max_archived_0_24_months',
       'sum_paid_inv_0_12m', 'sum_capital_paid_account_0_24m',
       'num_of_paid_inv_0_12m'],
      dtype='object')

In [48]:
# feature Engineering
test_data[con_fets] = imputer.transform(test_data[con_fets])
test_data[con_fets] = scaler.transform(test_data[con_fets])
test_data = ordinal_enc.transform(test_data)

In [49]:
predicted_prob = XGB_grid_oversample.predict_proba(test_data)

In [54]:
predicted_prob[:, 1]

array([0.95379037, 0.06602392, 0.9835128 , ..., 0.987877  , 0.8675852 ,
       0.9701601 ], dtype=float32)

In [55]:
XGB_grid_oversample.classes_

array([0., 1.])

In [56]:
defaulting_pb_df = pd.DataFrame(predicted_prob[:, 1])
defaulting_pb_df

Unnamed: 0,0
0,0.953790
1,0.066024
2,0.983513
3,0.360907
4,0.373533
...,...
9995,0.581467
9996,0.979478
9997,0.987877
9998,0.867585


In [60]:
predictions_on_unlabled_data = pd.concat([uuid_df, defaulting_pb_df], axis=1)

In [61]:
predictions_on_unlabled_data

Unnamed: 0,uuid,0
0,6f6e6c6a-2081-4e6b-8eb3-4fd89b54b2d7,0.953790
1,f6f6d9f3-ef2b-4329-a388-c6a687f27e70,0.066024
2,e9c39869-1bc5-4375-b627-a2df70b445ea,0.983513
3,6beb88a3-9641-4381-beb6-c9a208664dd0,0.360907
4,bb89b735-72fe-42a4-ba06-d63be0f4ca36,0.373533
...,...,...
9995,5c03bc63-ea65-4ffd-aa7b-95ea9a46db34,0.581467
9996,f8db22f4-9819-420c-abbc-9ddf1843176e,0.979478
9997,b22e21ea-b1b2-4df3-b236-0ff6d5fdc0d8,0.987877
9998,bafcab15-9898-479c-b729-c9dda7edb78f,0.867585
