Installing required packages

In [1]:
!pip install scikit-uplift

Collecting scikit-uplift
  Downloading scikit_uplift-0.5.1-py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.1/42.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-uplift
Successfully installed scikit-uplift-0.5.1


In [2]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.2


In [3]:
!pip install lightgbm



In [4]:
import numpy as np
import pandas as pd
import torch
import sklift

Helping Functions

In [5]:
from sklift.metrics import (
    uplift_at_k, uplift_auc_score, qini_auc_score, weighted_average_uplift
)

def calculate_uplift_metrics(model, features_test, labels_test, treat_test, k=0.3):
    uplift_predictions = model.predict(features_test.iloc[:, :-2])  # Assuming the last two columns are not used
    trmnt_val = treat_test

    uplift_at_k_value = uplift_at_k(y_true=labels_test, uplift=uplift_predictions,
                                    treatment=trmnt_val,
                                    strategy='overall', k=k)

    qini_auc = qini_auc_score(y_true=labels_test, uplift=uplift_predictions,
                              treatment=trmnt_val)

    uplift_auc = uplift_auc_score(y_true=labels_test, uplift=uplift_predictions,
                                  treatment=trmnt_val)

    weighted_avg_uplift = weighted_average_uplift(y_true=labels_test, uplift=uplift_predictions,
                                                  treatment=trmnt_val)

    metrics = {
        "Uplift@30%": uplift_at_k_value,
        "Area Under Qini Curve": qini_auc,
        "Area Under Uplift Curve": uplift_auc,
        "Weighted Average Uplift": weighted_avg_uplift
    }

    return metrics

In [7]:
def generate_submission_data(model, scaler, cols_to_scale, df_eval):
    submission_data = pd.DataFrame(columns=['customer', 'merchant', 'predicted_score'])

    for chunk in df_eval:
        chunk[cols_to_scale] = imputer.fit_transform(chunk[cols_to_scale])
        chunk[cols_to_scale] = scaler.transform(chunk[cols_to_scale])
        uplift_predictions = model.predict(chunk[cols_to_scale])
        chunk_predictions = pd.DataFrame({
            'customer': chunk['customer'],
            'merchant': chunk['merchant'],
            'predicted_score': uplift_predictions
        })

        submission_data = pd.concat([submission_data, chunk_predictions], ignore_index=True, axis=0)

    return submission_data

Preprocessing

In [None]:
df = pd.read_csv('/content/drive/MyDrive/Amex_Super_Bowl/Amex_Campus_Challenge_Train.csv', chunksize = 1000000)

In [None]:
import pandas as pd

# Read the CSV file in chunks
for chunk in df :

    # Or perform operations on the chunk
    # For instance, calculating the mean of a column
    table=chunk.groupby(['ind_recommended']).agg({'activation':['mean','sum','count']})
    print(table)
    # Continue with your desired operations on this chunk
    # ...

                activation            
                      mean sum   count
ind_recommended                       
0                      0.0   0  873555
1                      0.0   0  126445
                activation            
                      mean sum   count
ind_recommended                       
0                      0.0   0  873334
1                      0.0   0  126666
                activation            
                      mean sum   count
ind_recommended                       
0                      0.0   0  873734
1                      0.0   0  126266
                activation            
                      mean sum   count
ind_recommended                       
0                      0.0   0  873000
1                      0.0   0  127000
                activation            
                      mean sum   count
ind_recommended                       
0                      0.0   0  873947
1                      0.0   0  126053
                activatio

In [None]:
# Quiet imbalanced dataset in terms of

In [None]:
# Get the specific chunk using the get_chunk() method
last_chunk = chunk

In [None]:
chunk.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 229978 entries, 12000000 to 12229977
Data columns (total 71 columns):
 #   Column                        Non-Null Count   Dtype  
---  ------                        --------------   -----  
 0   ind_recommended               229978 non-null  int64  
 1   activation                    229978 non-null  int64  
 2   customer_digital_activity_04  47071 non-null   float64
 3   customer_spend_01             201773 non-null  float64
 4   customer_industry_spend_01    145113 non-null  float64
 5   customer_industry_spend_02    145113 non-null  float64
 6   customer_industry_spend_03    145113 non-null  float64
 7   customer_industry_spend_04    145113 non-null  float64
 8   customer_industry_spend_05    145113 non-null  float64
 9   customer_spend_02             201773 non-null  float64
 10  customer_spend_03             217269 non-null  float64
 11  customer_merchant_02          8277 non-null    float64
 12  customer_merchant_01          8277 

In [None]:
chunk.columns

Index(['ind_recommended', 'activation', 'customer_digital_activity_04',
       'customer_spend_01', 'customer_industry_spend_01',
       'customer_industry_spend_02', 'customer_industry_spend_03',
       'customer_industry_spend_04', 'customer_industry_spend_05',
       'customer_spend_02', 'customer_spend_03', 'customer_merchant_02',
       'customer_merchant_01', 'customer_spend_04', 'customer_spend_05',
       'customer_spend_06', 'customer_spend_07', 'merchant_spend_01',
       'merchant_spend_02', 'merchant_spend_03', 'merchant_spend_04',
       'merchant_spend_05', 'merchant_spend_06', 'merchant_spend_07',
       'merchant_spend_08', 'merchant_profile_01', 'customer_merchant_03',
       'customer_profile_01', 'customer_profile_02',
       'customer_digital_activity_05', 'customer_spend_13',
       'customer_digital_activity_06', 'customer_spend_14',
       'customer_digital_activity_07', 'customer_digital_activity_08',
       'customer_digital_activity_09', 'customer_digital_acti

In [None]:
chunk.isna().sum()

ind_recommended                      0
activation                           0
customer_digital_activity_04    182907
customer_spend_01                28205
customer_industry_spend_01       84865
                                 ...  
customer_digital_activity_02       480
customer_profile_04                 67
distance_05                          0
customer                             0
merchant                             0
Length: 71, dtype: int64

In [8]:
from google.colab import  drive
drive.mount('/drive')
chunk.to_csv('/drive/My Drive/Amex_train_set_compressed_chunk.csv', index=False)

Mounted at /drive


In [8]:
chunk = pd.read_csv('/content/drive/MyDrive/Amex_train_set_compressed_chunk.csv')

In [9]:
print(chunk.groupby(['ind_recommended']).agg({'activation':['mean','sum','count']}))

                activation               
                      mean    sum   count
ind_recommended                          
0                 0.300909  60173  199971
1                 0.328023   9843   30007


Sampling

In [10]:
chunk_1 = chunk[(chunk['ind_recommended']==0) & (chunk['activation']==1)]
chunk_2 = chunk[(chunk['ind_recommended']==0) & (chunk['activation']==0)]
chunk_3 = chunk[(chunk['ind_recommended']==1) & (chunk['activation']==1)]
chunk_4 = chunk[(chunk['ind_recommended']==1) & (chunk['activation']==0)]

In [11]:
#sampling
chunk_2 = chunk_2.sample(n=len(chunk_3)*2, replace = True)
chunk_4 = chunk_4.sample(n=len(chunk_3)*2, replace = True)
chunk_3=  chunk_3.sample(n=len(chunk_1), replace = True)
chunk_sampled = pd.concat([chunk_1,chunk_2 ,chunk_3, chunk_4], axis=0)

Imputing Null values

In [13]:
from sklearn.impute import SimpleImputer
import pandas as pd

imputer = SimpleImputer(strategy='mean')
cols_to_impute=chunk_sampled.columns[2:]

chunk_sampled[cols_to_impute] = imputer.fit_transform(chunk_sampled[cols_to_impute])

In [14]:
cols_to_impute

Index(['customer_digital_activity_04', 'customer_spend_01',
       'customer_industry_spend_01', 'customer_industry_spend_02',
       'customer_industry_spend_03', 'customer_industry_spend_04',
       'customer_industry_spend_05', 'customer_spend_02', 'customer_spend_03',
       'customer_merchant_02', 'customer_merchant_01', 'customer_spend_04',
       'customer_spend_05', 'customer_spend_06', 'customer_spend_07',
       'merchant_spend_01', 'merchant_spend_02', 'merchant_spend_03',
       'merchant_spend_04', 'merchant_spend_05', 'merchant_spend_06',
       'merchant_spend_07', 'merchant_spend_08', 'merchant_profile_01',
       'customer_merchant_03', 'customer_profile_01', 'customer_profile_02',
       'customer_digital_activity_05', 'customer_spend_13',
       'customer_digital_activity_06', 'customer_spend_14',
       'customer_digital_activity_07', 'customer_digital_activity_08',
       'customer_digital_activity_09', 'customer_digital_activity_10',
       'customer_digital_activ

In [15]:
import pandas as pd
import sklift
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
# Load your DataFrame with the columns you've provided
# df = pd.read_csv("your_dataframe.csv")

# Prepare features and labels
import pandas as pd

# Assuming chunk_sampled is your DataFrame
columns_to_drop = ['activation', 'ind_recommended']
features = chunk_sampled.drop(columns=columns_to_drop)  # Drop the specified columns
# Scale features except last two columns
scaler = StandardScaler()

# Exclude the last two columns
cols_to_scale = features.columns[:-2]

# Apply scaling to selected columns
features[cols_to_scale] = scaler.fit_transform(features[cols_to_scale])
labels = chunk_sampled['activation']
treat = chunk_sampled['ind_recommended']

# Split data into train and test sets
features_train, features_test, labels_train, labels_test, treat_train, treat_test= train_test_split(
    features,labels, treat, test_size=0.2, random_state=42
)

In [14]:
cols_to_scale.shape

(67,)

Custom functions to check metric on train/validation set

In [16]:
# required uplift
#creating custom function for MSB to just return Top 10 rank values of activation
def incr_act_top(input_df,pred_col,cm_key='customer',treated_col='ind_recommended',actual_col='activation'):

	#for correcting variable types
    input_df[[treated_col, actual_col, pred_col]] = input_df[[treated_col, actual_col, pred_col]].apply(pd.to_numeric, errors='coerce')

    input_df['rank_per_cm1'] = input_df.groupby(cm_key)[pred_col].rank(method='first', ascending=False)

    input_df = input_df.loc[input_df.rank_per_cm1 <= 10,:]

    agg_df = input_df.groupby(treated_col,as_index=False).agg({actual_col:'mean'})
    agg_df.columns = [treated_col,'avg_30d_act']

    print(agg_df)
    recommended_avg_30d_act = float(agg_df.loc[agg_df[treated_col]==1,'avg_30d_act'])
    not_recommended_avg_30d_act = float(agg_df.loc[agg_df[treated_col]==0,'avg_30d_act'])


    return (recommended_avg_30d_act-not_recommended_avg_30d_act)

def generate_submission_data_train(model, scaler, cols_to_scale, df):
    eval_data = pd.DataFrame(columns=['customer', 'merchant', 'predicted_score','ind_recommended','activation'])
    for chunk in df:
        chunk[cols_to_scale] = scaler.transform(chunk[cols_to_scale])
        uplift_predictions = model.predict(chunk[cols_to_scale])
        chunk_data = pd.DataFrame({
            'customer': chunk['customer'],
            'merchant': chunk['merchant'],
            'predicted_score': uplift_predictions,
            'ind_recommended': chunk['ind_recommended'],
            'activation' : chunk['activation']
        })
        eval_data = pd.concat([eval_data, chunk_data], ignore_index=True, axis=0)

    return eval_data

Submission_Round 1

In [17]:
from catboost import CatBoostClassifier
from sklift.models import ClassTransformation

estimator = CatBoostClassifier(verbose=100,
                               random_state=42,
                               thread_count=1)
ct_model = ClassTransformation(estimator=estimator)
ct_model = ct_model.fit(features[cols_to_scale], labels, treat)
df = pd.read_csv('/content/drive/MyDrive/Amex_Super_Bowl/Amex_Campus_Challenge_Train.csv', chunksize = 1000000)
train_score= generate_submission_data_train(ct_model, scaler, cols_to_scale, df)
final_score = round(incr_act_top(input_df=train_score,pred_col='predicted_score',cm_key='customer',treated_col='ind_recommended',actual_col='activation'), 7)
print(final_score)

Learning rate set to 0.089901
0:	learn: 0.6888388	total: 127ms	remaining: 2m 6s
100:	learn: 0.6409425	total: 8.31s	remaining: 1m 14s
200:	learn: 0.6197990	total: 15.7s	remaining: 1m 2s
300:	learn: 0.5998858	total: 23.4s	remaining: 54.3s
400:	learn: 0.5830935	total: 33.5s	remaining: 50.1s
500:	learn: 0.5668711	total: 40.4s	remaining: 40.3s
600:	learn: 0.5520430	total: 49.9s	remaining: 33.1s
700:	learn: 0.5382556	total: 57.9s	remaining: 24.7s
800:	learn: 0.5244469	total: 1m 14s	remaining: 18.6s
900:	learn: 0.5118726	total: 1m 22s	remaining: 9.05s
999:	learn: 0.5002780	total: 1m 30s	remaining: 0us
   ind_recommended  avg_30d_act
0                0     0.006865
1                1     0.011961
0.0050962


In [18]:
feat_imp = pd.DataFrame({
    'feature_name': ct_model.estimator.feature_names_,
    'feature_score': ct_model.estimator.feature_importances_
}).sort_values('feature_score', ascending=False).reset_index(drop=True)

feat_imp.head(20)

Unnamed: 0,feature_name,feature_score
0,customer_digital_activity_02,4.398848
1,customer_merchant_03,3.98305
2,distance_05,3.191116
3,customer_spend_04,3.189002
4,customer_spend_01,2.637322
5,merchant_profile_03,2.618413
6,customer_profile_04,2.526816
7,merchant_profile_02,2.449299
8,customer_profile_03,2.375996
9,customer_profile_01,2.292389


Submission on Round 2 evaluation set

In [18]:
def generate_submission_data_R2(model, scaler, cols_to_scale, df_eval):
    submission_data = pd.DataFrame(columns=['customer', 'merchant', 'predicted_score'])

    for chunk in df_eval:
        chunk[cols_to_impute] = imputer.transform(chunk[cols_to_impute])
        chunk[cols_to_scale] = scaler.transform(chunk[cols_to_scale])
        uplift_predictions = model.predict(chunk[cols_to_scale])
        chunk_predictions = pd.DataFrame({
            'customer': chunk['customer'],
            'merchant': chunk['merchant'],
            'predicted_score': uplift_predictions
        })

        submission_data = pd.concat([submission_data, chunk_predictions], ignore_index=True, axis=0)

    return submission_data

In [None]:
df_eval_2 = pd.read_csv('/content/drive/MyDrive/Amex_Super_Bowl/Amex_Campus_Challenge_Eval_Round2 (1).csv')

In [None]:
import pandas as pd

# Assuming 'df' is your DataFrame
# Replace 'df' with your actual DataFrame name
# Calculate the total number of rows in the DataFrame
total_rows = len(df_eval_2)

# Calculate the percentage of null values in each column
null_percentage = (df_eval_2.isnull().sum() / total_rows) * 100

# Display the results
null_percentage


customer_digital_activity_04    95.536416
customer_spend_01               12.041379
customer_industry_spend_01      56.162043
customer_industry_spend_02      56.162043
customer_industry_spend_03      56.162043
                                  ...    
customer_digital_activity_02     0.985387
customer_profile_04              0.654579
distance_05                      0.000000
customer                         0.000000
merchant                         0.000000
Length: 69, dtype: float64

In [None]:
df_eval_2.head()

Unnamed: 0,customer_digital_activity_04,customer_spend_01,customer_industry_spend_01,customer_industry_spend_02,customer_industry_spend_03,customer_industry_spend_04,customer_industry_spend_05,customer_spend_02,customer_spend_03,customer_merchant_02,...,merchant_spend_09,merchant_profile_03,customer_digital_activity_01,merchant_spend_10,customer_profile_03,customer_digital_activity_02,customer_profile_04,distance_05,customer,merchant
0,,,,,,,,,,,...,218.0,243.0,,509.755,,,,0.164692,574819,1087034
1,,,,,,,,,,,...,,,,,,,,0.539078,574819,1328040
2,,,,,,,,,,,...,8369.0,8960.0,,1096.775,,,,0.524643,574819,1502936
3,,,,,,,,,,,...,13503.0,8277.0,,669.96,,,,0.460987,574819,1529762
4,,,,,,,,,,,...,917.0,919.0,,114.495,,,,0.759766,574819,1685993


In [None]:
df_eval_2.shape

(4108134, 69)

In [None]:
df_eval_2 = pd.read_csv('/content/drive/MyDrive/Amex_Super_Bowl/Amex_Campus_Challenge_Eval_Round2 (1).csv', chunksize = 1000000)
submission_data= generate_submission_data_R2(ct_model, scaler, cols_to_scale, df_eval_2)

In [None]:
submission_data.shape

(4108134, 3)

In [None]:
submission_data.head()

Unnamed: 0,customer,merchant,predicted_score
0,574819.0,1087034.0,-0.230397
1,574819.0,1328040.0,-0.09499
2,574819.0,1502936.0,-0.201506
3,574819.0,1529762.0,-0.119029
4,574819.0,1685993.0,-0.21123


In [None]:
submission_data[['customer','merchant']] = submission_data[['customer','merchant']].astype(int)

In [None]:
from google.colab import drive
drive.mount('/drive')
submission_data.to_csv('/drive/My Drive/Amex_submission_R2_model_R1.csv', index=False)

Drive already mounted at /drive; to attempt to forcibly remount, call drive.mount("/drive", force_remount=True).


Experiments

SoloModel Approach

In [19]:
from sklift.models import SoloModel, TwoModels
import lightgbm as lgb
estimator = lgb.LGBMClassifier(random_state=32)

In [20]:
sm = SoloModel(estimator)
sm = sm.fit(features_train[cols_to_scale], labels_train, treat_train)
df = pd.read_csv('/content/drive/MyDrive/Amex_Super_Bowl/Amex_Campus_Challenge_Train.csv', chunksize = 1000000)
train_score= generate_submission_data_train(sm, scaler, cols_to_scale, df)
final_score = round(incr_act_top(input_df=train_score,pred_col='predicted_score',cm_key='customer',treated_col='ind_recommended',actual_col='activation'), 7)
print(final_score)

[LightGBM] [Info] Number of positive: 96359, number of negative: 31415
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 11895
[LightGBM] [Info] Number of data points in the train set: 127774, number of used features: 68
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.754136 -> initscore=1.120795
[LightGBM] [Info] Start training from score 1.120795
   ind_recommended  avg_30d_act
0                0     0.011796
1                1     0.014684
0.0028879


Two Model Approach

In [26]:
from sklift.models import TwoModels


tm = TwoModels(
    estimator_trmnt=CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True),
    estimator_ctrl=CatBoostClassifier(iterations=20, thread_count=2, random_state=42, silent=True),
    method='vanilla'
)

tm = tm.fit(features_train[cols_to_scale], labels_train, treat_train)
df = pd.read_csv('/content/drive/MyDrive/Amex_Super_Bowl/Amex_Campus_Challenge_Train.csv', chunksize = 1000000)
train_score= generate_submission_data_train(tm, scaler, cols_to_scale, df)
final_score = round(incr_act_top(input_df=train_score,pred_col='predicted_score',cm_key='customer',treated_col='ind_recommended',actual_col='activation'), 7)
print(final_score)

   ind_recommended  avg_30d_act
0                0     0.007023
1                1     0.008556
0.0015333


Feature Strength of best performing model

Performance on top 10 features

In [19]:
cols_to_scale= feat_imp.iloc[:10, 0].to_list()

In [20]:
def process_data_and_get_score(cols_to_scale):
    # Load your DataFrame
        df = pd.read_csv('/content/drive/MyDrive/Amex_Super_Bowl/Amex_Campus_Challenge_Train.csv', chunksize=1000000)

    # Define columns to drop
        columns_to_drop = ['activation', 'ind_recommended']
   # Impute missing values
        cols_to_impute = cols_to_scale
        chunk_sampled = pd.concat([chunk_1,chunk_2 ,chunk_3, chunk_4], axis=0)
        chunk_sampled[cols_to_impute] = imputer.fit_transform(chunk_sampled[cols_to_impute])

        features = chunk_sampled.drop(columns=columns_to_drop)
        labels = chunk_sampled['activation']
        treat = chunk_sampled['ind_recommended']

        # Scale features
        features[cols_to_scale] = scaler.fit_transform(features[cols_to_scale])

        # Define and train the model
        estimator = CatBoostClassifier(verbose=100, random_state=42, thread_count=1)
        ct_model = ClassTransformation(estimator=estimator)
        ct_model = ct_model.fit(features[cols_to_scale], labels, treat)

        # Generate submission data for training
        train_score = generate_submission_data_train(ct_model, scaler, cols_to_scale, df)

        # Calculate the final score
        final_score = round(incr_act_top(input_df=train_score, pred_col='predicted_score', cm_key='customer',
                                        treated_col='ind_recommended', actual_col='activation'), 7)



        return final_score


In [21]:
final_score = process_data_and_get_score(cols_to_scale)
print(final_score)

Learning rate set to 0.089901
0:	learn: 0.6899566	total: 65.6ms	remaining: 1m 5s
100:	learn: 0.6571797	total: 4.02s	remaining: 35.8s
200:	learn: 0.6418986	total: 9.24s	remaining: 36.7s
300:	learn: 0.6286491	total: 12.8s	remaining: 29.7s
400:	learn: 0.6166446	total: 16.3s	remaining: 24.3s
500:	learn: 0.6050806	total: 21s	remaining: 20.9s
600:	learn: 0.5943992	total: 25.8s	remaining: 17.1s
700:	learn: 0.5843052	total: 32.8s	remaining: 14s
800:	learn: 0.5750197	total: 37.1s	remaining: 9.21s
900:	learn: 0.5658422	total: 40.6s	remaining: 4.46s
999:	learn: 0.5574856	total: 46.4s	remaining: 0us
   ind_recommended  avg_30d_act
0                0     0.005371
1                1     0.013222
0.0078518


Performance on mid 10 features

In [22]:
cols_to_scale= feat_imp.iloc[30:40, 0].to_list()
final_score = process_data_and_get_score(cols_to_scale)
print(final_score)

Learning rate set to 0.089901
0:	learn: 0.6928418	total: 36.4ms	remaining: 36.4s
100:	learn: 0.6798501	total: 4.7s	remaining: 41.8s
200:	learn: 0.6687929	total: 8.09s	remaining: 32.2s
300:	learn: 0.6573034	total: 11.4s	remaining: 26.6s
400:	learn: 0.6467309	total: 19.7s	remaining: 29.5s
500:	learn: 0.6370928	total: 26.1s	remaining: 26s
600:	learn: 0.6280792	total: 33.3s	remaining: 22.1s
700:	learn: 0.6195285	total: 38s	remaining: 16.2s
800:	learn: 0.6116585	total: 42.3s	remaining: 10.5s
900:	learn: 0.6041004	total: 47s	remaining: 5.16s
999:	learn: 0.5971931	total: 50.5s	remaining: 0us
   ind_recommended  avg_30d_act
0                0     0.005277
1                1     0.010656
0.0053782


Performance on weakest 10 features

In [23]:
cols_to_scale= feat_imp.iloc[60:, 0].to_list()
final_score = process_data_and_get_score(cols_to_scale)
print(final_score)

Learning rate set to 0.089901
0:	learn: 0.6906710	total: 33.9ms	remaining: 33.9s
100:	learn: 0.6755678	total: 4.35s	remaining: 38.7s
200:	learn: 0.6733957	total: 7.53s	remaining: 29.9s
300:	learn: 0.6716171	total: 10.7s	remaining: 24.8s
400:	learn: 0.6700959	total: 13.9s	remaining: 20.8s
500:	learn: 0.6689740	total: 18.1s	remaining: 18.1s
600:	learn: 0.6680573	total: 21.4s	remaining: 14.2s
700:	learn: 0.6673080	total: 24.6s	remaining: 10.5s
800:	learn: 0.6667572	total: 27.8s	remaining: 6.91s
900:	learn: 0.6661141	total: 32s	remaining: 3.52s
999:	learn: 0.6656888	total: 35.1s	remaining: 0us
   ind_recommended  avg_30d_act
0                0     0.000115
1                1     0.000504
0.0003891
