In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score, precision_score, recall_score, roc_auc_score
import numpy as np

In [2]:
# Read the CSV file into a dataframe
data = pd.read_csv('credit_card_transactions.csv')

# Display the first few records
data.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,long,city_pop,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,merch_zipcode
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,-81.1781,3495,"Psychologist, counselling",1988-03-09,0b242abb623afc578575680df30655b9,1325376018,36.011293,-82.048315,0,28705.0
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,-118.2105,149,Special educational needs teacher,1978-06-21,1f76529f8574734946361c461b024d99,1325376044,49.159047,-118.186462,0,
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,-112.262,4154,Nature conservation officer,1962-01-19,a1a22d70485983eac12b5b88dad1cf95,1325376051,43.150704,-112.154481,0,83236.0
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,-112.1138,1939,Patent attorney,1967-01-12,6b849c168bdad6f867558c3793159a81,1325376076,47.034331,-112.561071,0,
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,-79.4629,99,Dance movement psychotherapist,1986-03-28,a41d7549acf90789359a9aa5346dcb46,1325376186,38.674999,-78.632459,0,22844.0


In [3]:
# Explore data columns
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296675 entries, 0 to 1296674
Data columns (total 24 columns):
 #   Column                 Non-Null Count    Dtype  
---  ------                 --------------    -----  
 0   Unnamed: 0             1296675 non-null  int64  
 1   trans_date_trans_time  1296675 non-null  object 
 2   cc_num                 1296675 non-null  int64  
 3   merchant               1296675 non-null  object 
 4   category               1296675 non-null  object 
 5   amt                    1296675 non-null  float64
 6   first                  1296675 non-null  object 
 7   last                   1296675 non-null  object 
 8   gender                 1296675 non-null  object 
 9   street                 1296675 non-null  object 
 10  city                   1296675 non-null  object 
 11  state                  1296675 non-null  object 
 12  zip                    1296675 non-null  int64  
 13  lat                    1296675 non-null  float64
 14  long              

In [4]:
# function to get number of unique values for each column ..
# to determine what kind of encoding and imputation needs to be done!

def display_unique_values(data):
    """
    Display unique values and the number of unique values for each column in the dataframe.

    Parameters:
    data (pd.DataFrame): The dataframe to analyze.

    Returns:
    None
    """
    for column in data.columns:
        unique_values = data[column].unique()
        print(f"Column: {column}")
        print(f"Data Type: {data[column].dtype}")
        print(f"Number of Unique Values: {len(unique_values)}")
        print(f"Unique Values: {unique_values[:10]}...")  # Show the first 10 unique values for brevity
        print("-" * 50)

# Display details of each column:
display_unique_values(data)

Column: Unnamed: 0
Data Type: int64
Number of Unique Values: 1296675
Unique Values: [0 1 2 3 4 5 6 7 8 9]...
--------------------------------------------------
Column: trans_date_trans_time
Data Type: object
Number of Unique Values: 1274791
Unique Values: ['2019-01-01 00:00:18' '2019-01-01 00:00:44' '2019-01-01 00:00:51'
 '2019-01-01 00:01:16' '2019-01-01 00:03:06' '2019-01-01 00:04:08'
 '2019-01-01 00:04:42' '2019-01-01 00:05:08' '2019-01-01 00:05:18'
 '2019-01-01 00:06:01']...
--------------------------------------------------
Column: cc_num
Data Type: int64
Number of Unique Values: 983
Unique Values: [2703186189652095     630423337322   38859492057661 3534093764340240
  375534208663984 4767265376804500   30074693890476 6011360759745864
 4922710831011201 2720830304681674]...
--------------------------------------------------
Column: merchant
Data Type: object
Number of Unique Values: 693
Unique Values: ['fraud_Rippin, Kub and Mann' 'fraud_Heller, Gutmann and Zieme'
 'fraud_Lind-Buckr

In [5]:
# Make a copy for transformation
ak_df = data.copy()

# Convert date columns to datetime format
ak_df['trans_date_trans_time'] = pd.to_datetime(ak_df['trans_date_trans_time'])
ak_df['dob'] = pd.to_datetime(ak_df['dob'])

In [6]:
# AGE - Calculate age based on DOB and transaction year
ak_df['ft_age'] = ak_df['trans_date_trans_time'].dt.year - ak_df['dob'].dt.year

# AGE SEGMENT - Define age segments
def age_segment(age):
    if age < 18:
        return 'Under 18'
    elif 19 <= age <= 28:
        return '19-28'
    elif 29 <= age <= 38:
        return '29-38'
    elif 39 <= age <= 48:
        return '39-48'
    elif 49 <= age <= 58:
        return '49-58'
    elif 59 <= age <= 68:
        return '59-68'
    elif 69 <= age <= 78:
        return '69-78'
    else:
        return '79+'

# Apply age segmentation
ak_df['ft_age_group'] = ak_df['ft_age'].apply(age_segment)

ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,job,dob,trans_num,unix_time,merch_lat,merch_long,is_fraud,merch_zipcode,ft_age,ft_age_group
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,"Psychologist, counselling",1988-03-09,0b242abb623afc578575680df30655b9,1325376018,36.011293,-82.048315,0,28705.0,31,29-38
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,Special educational needs teacher,1978-06-21,1f76529f8574734946361c461b024d99,1325376044,49.159047,-118.186462,0,,41,39-48
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,Nature conservation officer,1962-01-19,a1a22d70485983eac12b5b88dad1cf95,1325376051,43.150704,-112.154481,0,83236.0,57,49-58
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,Patent attorney,1967-01-12,6b849c168bdad6f867558c3793159a81,1325376076,47.034331,-112.561071,0,,52,49-58
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,Dance movement psychotherapist,1986-03-28,a41d7549acf90789359a9aa5346dcb46,1325376186,38.674999,-78.632459,0,22844.0,33,29-38


In [7]:
# TRANSACTION HOUR
ak_df['ft_trans_hour'] = ak_df['trans_date_trans_time'].dt.hour

# TIME OF DAY - Categorize time of day based on transaction hour
ak_df['ft_time_of_day'] = ak_df['trans_date_trans_time'].dt.hour.apply(
    lambda hour: 'Morning' if 6 <= hour < 12 else
    ('Afternoon' if 12 <= hour < 18 else
    ('Evening' if 18 <= hour < 24 else 'Night'))
)

# TRANSACTION DAY
ak_df['ft_trans_day'] = ak_df['trans_date_trans_time'].dt.day

# TRANSACTION DAY OF YEAR
ak_df['ft_trans_day_of_year'] = ak_df['trans_date_trans_time'].dt.dayofyear

# TRANSACTION MONTH
ak_df['ft_trans_month'] = ak_df['trans_date_trans_time'].dt.month

# DAY OF WEEK - Add a new column 'day_of_week' to represent the day of the week (0=Monday, 6=Sunday)
ak_df['ft_day_of_week'] = ak_df['trans_date_trans_time'].dt.day_name()


ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,is_fraud,merch_zipcode,ft_age,ft_age_group,ft_trans_hour,ft_time_of_day,ft_trans_day,ft_trans_day_of_year,ft_trans_month,ft_day_of_week
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,0,28705.0,31,29-38,0,Night,1,1,1,Tuesday
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,0,,41,39-48,0,Night,1,1,1,Tuesday
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,0,83236.0,57,49-58,0,Night,1,1,1,Tuesday
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,0,,52,49-58,0,Night,1,1,1,Tuesday
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,0,22844.0,33,29-38,0,Night,1,1,1,Tuesday


In [8]:
# DISTANCE BETWEEN USER AND MERCHANT

import numpy as np

# Define the Haversine Formula function that calculates the distance given two latitude/longitude points
def haversine(lat1, lon1, lat2, lon2):
    # Convert latitude and longitude from degrees to radians
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    # Haversine formula
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2)**2
    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
    radius = 6371  # Radius of Earth in kilometers
    return radius * c

# Create a new column for distance
ak_df['ft_distance_user_merchant'] = haversine(
    ak_df['lat'], ak_df['long'],
    ak_df['merch_lat'], ak_df['merch_long']
)

# Calculate the average distance of previous transactions for a user and compare the current transaction distance
user_avg_distance = ak_df.groupby('cc_num')['ft_distance_user_merchant'].transform('mean')
ak_df['ft_merchant_distance_from_user_mean'] = ak_df['ft_distance_user_merchant'] - user_avg_distance


ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,ft_age,ft_age_group,ft_trans_hour,ft_time_of_day,ft_trans_day,ft_trans_day_of_year,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,31,29-38,0,Night,1,1,1,Tuesday,78.597568,0.637376
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,41,39-48,0,Night,1,1,1,Tuesday,30.212176,-41.58037
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,57,49-58,0,Night,1,1,1,Tuesday,108.206083,34.130756
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,52,49-58,0,Night,1,1,1,Tuesday,95.673231,23.97106
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,33,29-38,0,Night,1,1,1,Tuesday,77.556744,2.322402


In [9]:
# Count the number of transactions for each merchant in the dataset
merchant_transaction_counts = ak_df['merchant'].value_counts()
ak_df['ft_merchant_popularity'] = ak_df['merchant'].map(merchant_transaction_counts)

ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,ft_age_group,ft_trans_hour,ft_time_of_day,ft_trans_day,ft_trans_day_of_year,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,29-38,0,Night,1,1,1,Tuesday,78.597568,0.637376,1267
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,39-48,0,Night,1,1,1,Tuesday,30.212176,-41.58037,2503
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,49-58,0,Night,1,1,1,Tuesday,108.206083,34.130756,1895
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,49-58,0,Night,1,1,1,Tuesday,95.673231,23.97106,2613
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,29-38,0,Night,1,1,1,Tuesday,77.556744,2.322402,1592


In [10]:
# Calculate the mean transaction amount for each user (cc_num)
user_mean_amt = ak_df.groupby('cc_num')['amt'].transform('mean')
ak_df['ft_mean_amt_per_user'] = user_mean_amt

ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,ft_trans_hour,ft_time_of_day,ft_trans_day,ft_trans_day_of_year,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,0,Night,1,1,1,Tuesday,78.597568,0.637376,1267,87.393215
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,0,Night,1,1,1,Tuesday,30.212176,-41.58037,2503,53.94932
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,0,Night,1,1,1,Tuesday,108.206083,34.130756,1895,65.87004
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,0,Night,1,1,1,Tuesday,95.673231,23.97106,2613,72.776673
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,0,Night,1,1,1,Tuesday,77.556744,2.322402,1592,95.178091


In [11]:
# Calculate the deviation of the transaction amount from the user’s mean transaction amount
ak_df['ft_amt_deviation'] = ak_df['amt'] - ak_df['ft_mean_amt_per_user']

ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,ft_time_of_day,ft_trans_day,ft_trans_day_of_year,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,Night,1,1,1,Tuesday,78.597568,0.637376,1267,87.393215,-82.423215
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,Night,1,1,1,Tuesday,30.212176,-41.58037,2503,53.94932,53.28068
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,Night,1,1,1,Tuesday,108.206083,34.130756,1895,65.87004,154.23996
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,Night,1,1,1,Tuesday,95.673231,23.97106,2613,72.776673,-27.776673
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,Night,1,1,1,Tuesday,77.556744,2.322402,1592,95.178091,-53.218091


In [12]:
# Count the number of transactions for each user (cc_num)
user_transaction_count = ak_df['cc_num'].value_counts()
ak_df['ft_transaction_count_per_user'] = ak_df['cc_num'].map(user_transaction_count)

ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,ft_trans_day,ft_trans_day_of_year,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,1,1,1,Tuesday,78.597568,0.637376,1267,87.393215,-82.423215,2028
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,1,1,1,Tuesday,30.212176,-41.58037,2503,53.94932,53.28068,3030
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,1,1,1,Tuesday,108.206083,34.130756,1895,65.87004,154.23996,503
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,1,1,1,Tuesday,95.673231,23.97106,2613,72.776673,-27.776673,493
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,1,1,1,Tuesday,77.556744,2.322402,1592,95.178091,-53.218091,2017


In [13]:
# Calculate the fraud rate for each state by dividing the number of fraudulent transactions by the total number of transactions
fraud_rate_by_state = ak_df.groupby('state')['is_fraud'].mean()
ak_df['ft_state_fraud_rate'] = ak_df['state'].map(fraud_rate_by_state)

ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,ft_trans_day_of_year,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user,ft_state_fraud_rate
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,1,1,Tuesday,78.597568,0.637376,1267,87.393215,-82.423215,2028,0.004923
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,1,1,Tuesday,30.212176,-41.58037,2503,53.94932,53.28068,3030,0.005073
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,1,1,Tuesday,108.206083,34.130756,1895,65.87004,154.23996,503,0.001984
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,1,1,Tuesday,95.673231,23.97106,2613,72.776673,-27.776673,493,0.002722
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,1,1,Tuesday,77.556744,2.322402,1592,95.178091,-53.218091,2017,0.006769


In [14]:
# Flag transactions as recurring if the same merchant and cc_num combination appears multiple times within a short period
ak_df['ft_transaction_is_recurring'] = ak_df.duplicated(subset=['cc_num', 'merchant'], keep=False).astype(int)

ak_df.head()

Unnamed: 0.1,Unnamed: 0,trans_date_trans_time,cc_num,merchant,category,amt,first,last,gender,street,...,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user,ft_state_fraud_rate,ft_transaction_is_recurring
0,0,2019-01-01 00:00:18,2703186189652095,"fraud_Rippin, Kub and Mann",misc_net,4.97,Jennifer,Banks,F,561 Perry Cove,...,1,Tuesday,78.597568,0.637376,1267,87.393215,-82.423215,2028,0.004923,1
1,1,2019-01-01 00:00:44,630423337322,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,Stephanie,Gill,F,43039 Riley Greens Suite 393,...,1,Tuesday,30.212176,-41.58037,2503,53.94932,53.28068,3030,0.005073,1
2,2,2019-01-01 00:00:51,38859492057661,fraud_Lind-Buckridge,entertainment,220.11,Edward,Sanchez,M,594 White Dale Suite 530,...,1,Tuesday,108.206083,34.130756,1895,65.87004,154.23996,503,0.001984,1
3,3,2019-01-01 00:01:16,3534093764340240,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,Jeremy,White,M,9443 Cynthia Court Apt. 038,...,1,Tuesday,95.673231,23.97106,2613,72.776673,-27.776673,493,0.002722,0
4,4,2019-01-01 00:03:06,375534208663984,fraud_Keeling-Crist,misc_pos,41.96,Tyler,Garcia,M,408 Bradley Rest,...,1,Tuesday,77.556744,2.322402,1592,95.178091,-53.218091,2017,0.006769,1


In [15]:
# lets review the current columns of the updated dataframe
ak_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296675 entries, 0 to 1296674
Data columns (total 40 columns):
 #   Column                               Non-Null Count    Dtype         
---  ------                               --------------    -----         
 0   Unnamed: 0                           1296675 non-null  int64         
 1   trans_date_trans_time                1296675 non-null  datetime64[ns]
 2   cc_num                               1296675 non-null  int64         
 3   merchant                             1296675 non-null  object        
 4   category                             1296675 non-null  object        
 5   amt                                  1296675 non-null  float64       
 6   first                                1296675 non-null  object        
 7   last                                 1296675 non-null  object        
 8   gender                               1296675 non-null  object        
 9   street                               1296675 non-null  ob

In [16]:
# Drop unnecessary columns
columns_to_drop = [
    'Unnamed: 0', 'trans_date_trans_time', 'cc_num', 'first', 'last', 'street', 'trans_num', 'lat', 
    'long', 'merch_zipcode', 'ft_age', 'merch_lat', 'merch_long', 'trans_num', 'unix_time', 'zip', 'dob'
]
ak_df_cleaned = ak_df.drop(columns=columns_to_drop)

ak_df_cleaned.head()

# Check for null values
# print(ak_df_cleaned.isnull().sum())

Unnamed: 0,merchant,category,amt,gender,city,state,city_pop,job,is_fraud,ft_age_group,...,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user,ft_state_fraud_rate,ft_transaction_is_recurring
0,"fraud_Rippin, Kub and Mann",misc_net,4.97,F,Moravian Falls,NC,3495,"Psychologist, counselling",0,29-38,...,1,Tuesday,78.597568,0.637376,1267,87.393215,-82.423215,2028,0.004923,1
1,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,F,Orient,WA,149,Special educational needs teacher,0,39-48,...,1,Tuesday,30.212176,-41.58037,2503,53.94932,53.28068,3030,0.005073,1
2,fraud_Lind-Buckridge,entertainment,220.11,M,Malad City,ID,4154,Nature conservation officer,0,49-58,...,1,Tuesday,108.206083,34.130756,1895,65.87004,154.23996,503,0.001984,1
3,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.0,M,Boulder,MT,1939,Patent attorney,0,49-58,...,1,Tuesday,95.673231,23.97106,2613,72.776673,-27.776673,493,0.002722,0
4,fraud_Keeling-Crist,misc_pos,41.96,M,Doe Hill,VA,99,Dance movement psychotherapist,0,29-38,...,1,Tuesday,77.556744,2.322402,1592,95.178091,-53.218091,2017,0.006769,1


In [17]:
# Explore data columns
ak_df_cleaned.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1296675 entries, 0 to 1296674
Data columns (total 24 columns):
 #   Column                               Non-Null Count    Dtype  
---  ------                               --------------    -----  
 0   merchant                             1296675 non-null  object 
 1   category                             1296675 non-null  object 
 2   amt                                  1296675 non-null  float64
 3   gender                               1296675 non-null  object 
 4   city                                 1296675 non-null  object 
 5   state                                1296675 non-null  object 
 6   city_pop                             1296675 non-null  int64  
 7   job                                  1296675 non-null  object 
 8   is_fraud                             1296675 non-null  int64  
 9   ft_age_group                         1296675 non-null  object 
 10  ft_trans_hour                        1296675 non-null  int32  
 11

In [19]:
# Setup X and y variables
X = ak_df_cleaned.drop(columns='is_fraud')
y = ak_df_cleaned['is_fraud'].values.reshape(-1,1)

# Split the data into training and testing sets
X_train1, X_test, y_train1, y_test = train_test_split(X, y, test_size=0.1 ,random_state=42)
X_train, X_validation, y_train, y_validation = train_test_split(X_train1, y_train1, random_state=42)

X_train.describe()

Unnamed: 0,amt,city_pop,ft_trans_hour,ft_trans_day,ft_trans_day_of_year,ft_trans_month,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user,ft_state_fraud_rate,ft_transaction_is_recurring
count,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0,875255.0
mean,70.289636,88859.43,12.805973,15.591315,171.276758,6.140808,76.10195,-0.013551,2048.188991,70.340711,-0.051075,1818.577768,0.005789,0.880249
std,161.038519,301752.6,6.815471,8.830291,104.349248,3.416813,29.113521,29.025146,529.972298,19.431438,159.856002,742.649295,0.002865,0.324671
min,1.0,23.0,0.0,1.0,1.0,1.0,0.093922,-80.831303,727.0,42.951671,-764.76375,7.0,0.001984,0.0
25%,9.67,743.0,7.0,8.0,87.0,3.0,55.324579,-20.69833,1783.0,59.800213,-52.441447,1466.0,0.005149,1.0
50%,47.55,2456.0,14.0,15.0,155.0,6.0,78.228994,2.182731,1985.0,65.09374,-24.035525,2000.0,0.005693,1.0
75%,83.18,20328.0,19.0,23.0,255.0,9.0,98.485766,22.251355,2444.0,82.89498,12.562201,2524.0,0.006585,1.0
max,28948.9,2906700.0,23.0,31.0,365.0,12.0,152.117173,70.815341,4403.0,948.818182,28873.996302,3123.0,1.0,1.0


In [20]:
!pip install category-encoders



In [21]:
import category_encoders as ce

# Define categorical columns to encode
categorical_columns = ['merchant', 'category', 'gender', 'city', 'state', 
                       'job', 'ft_time_of_day', 'ft_age_group', 'ft_day_of_week']

# Initialize the target encoder
encoder = ce.TargetEncoder(cols=categorical_columns)

# Fit the encoder on X_train using y_train
X_train_encoded = encoder.fit_transform(X_train, y_train)
X_train_encoded.head()

Unnamed: 0,merchant,category,amt,gender,city,state,city_pop,job,ft_age_group,ft_trans_hour,...,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user,ft_state_fraud_rate,ft_transaction_is_recurring
993968,0.005161,0.004716,68.95,0.005266,0.002158,0.005537,139,0.007501,0.006654,8,...,2,0.004693,25.711837,-50.067674,2641,61.871168,7.078832,2037,0.005736,1
1194466,0.00188,0.001614,82.84,0.006372,0.004425,0.006557,1453,0.001954,0.00413,23,...,5,0.006577,60.318251,-13.413125,2341,97.602465,-14.762465,2049,0.006647,1
197622,0.00384,0.004716,45.97,0.006372,0.0,0.005193,116001,0.0,0.005988,4,...,4,0.006991,64.153784,-13.583969,2678,61.917571,-15.947571,2561,0.005362,1
329260,0.003728,0.00303,35.55,0.006372,0.005,0.005565,795,0.007244,0.004579,4,...,6,0.006909,86.365736,13.417066,1582,92.04871,-56.49871,2093,0.00555,1
714316,0.00241,0.002389,12.15,0.005266,0.008759,0.004392,2456,0.012922,0.005988,14,...,11,0.006991,130.688611,55.007456,1807,55.221352,-43.071352,2042,0.004796,1


In [22]:
# Transform X_test using the already fitted encoder
X_test_encoded = encoder.transform(X_test)

# Check the transformed X_test
X_test_encoded.head()

Unnamed: 0,merchant,category,amt,gender,city,state,city_pop,job,ft_age_group,ft_trans_hour,...,ft_trans_month,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user,ft_state_fraud_rate,ft_transaction_is_recurring
1045211,0.002887,0.00303,194.51,0.006372,0.0,0.005537,972,0.005549,0.005988,15,...,3,0.004693,54.336119,-22.09335,1524,69.821821,124.688179,1494,0.005736,1
547406,0.000847,0.001414,52.32,0.005266,0.01238,0.008031,217,0.005698,0.00919,15,...,8,0.006909,66.060865,-6.639384,1751,64.054238,-11.734238,1043,0.008012,1
110142,0.008521,0.007036,6.53,0.005266,0.0,0.005537,184,0.001157,0.007238,1,...,3,0.004693,94.386045,17.820994,2362,63.39064,-56.86064,2062,0.005736,1
1285953,0.001226,0.001614,7.33,0.006372,0.0,0.006557,10717,0.001183,0.006654,20,...,6,0.00566,109.25129,35.316763,2456,69.869291,-62.539291,1496,0.006647,1
271705,0.00497,0.004716,64.29,0.005266,0.008996,0.006371,635,0.004358,0.00413,5,...,5,0.00566,67.501516,-6.773677,2676,50.852975,13.437025,995,0.005693,1


In [23]:
from sklearn.preprocessing import StandardScaler

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit the scaler on X_train_encoded and transform it
X_train_scaled = scaler.fit_transform(X_train_encoded)
print("X_train scaled sample:")
print(X_train_scaled[:5])  # Display the first 5 rows of the scaled training data

# Transform X_test_encoded using the fitted scaler
X_test_scaled = scaler.transform(X_test_encoded)
print("X_test scaled sample:")
print(X_test_scaled[:5])  # Display the first 5 rows of the scaled testing data

X_train scaled sample:
[[-0.10776812 -0.19882275 -0.00831873 -0.90893792 -0.46342998 -0.17097327
  -0.29401729  0.34444114  0.58797112 -0.70515682 -1.00369582 -0.63319742
  -1.2484693  -1.21189254 -1.21977268 -1.73081579 -1.7245099   1.11857044
  -0.4358683   0.04460208  0.29411239 -0.01845019  0.36883965]
 [-0.69144133 -0.78624224  0.07793397  1.10018515 -0.14235729  0.60738705
  -0.28966272 -0.68165575 -1.0837832   1.49571948  1.01427476 -0.29345759
  -0.35723094 -0.33388092  0.92130448 -0.54214357 -0.46165424  0.5525028
   1.40297232 -0.09202906  0.31027076  0.29937168  0.36883965]
 [-0.34272921 -0.19882275 -0.15101759  1.10018515 -0.76917054 -0.43298392
   0.08994647 -1.04302257  0.14706631 -1.29205717  0.96798669 -0.4067042
  -0.6638936  -0.62655146  1.39139645 -0.41039945 -0.46754031  1.18838545
  -0.43348027 -0.09944265  0.99969482 -0.14909638  0.36883965]
 [-0.36264699 -0.51818135 -0.21572265  1.10018515 -0.06087156 -0.14913338
  -0.29184332  0.29677884 -0.78644365 -1.29205717 

In [24]:
y1 = pd.DataFrame(y_train)

df_corr = pd.concat([X_train_encoded.reset_index(),y1.reset_index()], axis=1, ignore_index=True)
df_corr.drop(columns=[24,25], inplace=True)

columns = [col for col in X_train_encoded.columns]
columns.append('Result')

df_corr.columns = columns 
df_corr.head()

Unnamed: 0,merchant,category,amt,gender,city,state,city_pop,job,ft_age_group,ft_trans_hour,...,ft_day_of_week,ft_distance_user_merchant,ft_merchant_distance_from_user_mean,ft_merchant_popularity,ft_mean_amt_per_user,ft_amt_deviation,ft_transaction_count_per_user,ft_state_fraud_rate,ft_transaction_is_recurring,Result
0,993968,0.005161,0.004716,68.95,0.005266,0.002158,0.005537,139,0.007501,0.006654,...,2,0.004693,25.711837,-50.067674,2641,61.871168,7.078832,2037,0.005736,1
1,1194466,0.00188,0.001614,82.84,0.006372,0.004425,0.006557,1453,0.001954,0.00413,...,5,0.006577,60.318251,-13.413125,2341,97.602465,-14.762465,2049,0.006647,1
2,197622,0.00384,0.004716,45.97,0.006372,0.0,0.005193,116001,0.0,0.005988,...,4,0.006991,64.153784,-13.583969,2678,61.917571,-15.947571,2561,0.005362,1
3,329260,0.003728,0.00303,35.55,0.006372,0.005,0.005565,795,0.007244,0.004579,...,6,0.006909,86.365736,13.417066,1582,92.04871,-56.49871,2093,0.00555,1
4,714316,0.00241,0.002389,12.15,0.005266,0.008759,0.004392,2456,0.012922,0.005988,...,11,0.006991,130.688611,55.007456,1807,55.221352,-43.071352,2042,0.004796,1


In [25]:
columns_corr = df_corr.corr()['Result'].sort_values()

columns_corr

state                                 -0.188662
ft_age_group                          -0.140270
ft_trans_hour                         -0.091808
ft_amt_deviation                      -0.068689
city                                  -0.048692
city_pop                              -0.039823
ft_transaction_is_recurring           -0.023963
gender                                -0.008430
job                                   -0.002138
ft_trans_day                          -0.001341
ft_distance_user_merchant             -0.001005
ft_merchant_popularity                -0.000932
ft_trans_day_of_year                  -0.000873
ft_merchant_distance_from_user_mean   -0.000645
merchant                              -0.000171
ft_transaction_count_per_user         -0.000143
ft_trans_month                         0.000434
ft_day_of_week                         0.000509
amt                                    0.013204
category                               0.013293
ft_time_of_day                         0

In [26]:
sorted_columns = [item[0] for item in columns_corr.items()]

print(sorted_columns)

['state', 'ft_age_group', 'ft_trans_hour', 'ft_amt_deviation', 'city', 'city_pop', 'ft_transaction_is_recurring', 'gender', 'job', 'ft_trans_day', 'ft_distance_user_merchant', 'ft_merchant_popularity', 'ft_trans_day_of_year', 'ft_merchant_distance_from_user_mean', 'merchant', 'ft_transaction_count_per_user', 'ft_trans_month', 'ft_day_of_week', 'amt', 'category', 'ft_time_of_day', 'ft_mean_amt_per_user', 'ft_state_fraud_rate', 'Result']


In [29]:
def cost_of_error_by_chebyshevs(x, y_real, y_prediction, cost=0.25 ):
    
    false_negative_cost = []
    false_positive_cost = []
    correct = []
    
    size = len(y_real)
    
    for i in range(0, size-1):
        if ( y_real[i][0] != y_prediction[i])  :
            if (y_real[i][0] == np.int64(1)):
                false_negative_cost.append(x[i])
            else:
                false_positive_cost.append(x[i])
        else:
            if ( y_real[i][0] == np.int64(1)):
                correct.append(x[i])
    
    cost_correct_pred = (sum(correct)*len(correct))
    cost_fp_pred = (cost * sum(false_positive_cost) * (len(false_positive_cost)))
    cost_fn_pred = ( sum(false_negative_cost) * (len(false_negative_cost)) )
    
    return  { 
                'accuracy' : len(correct)/(len(correct) + len(false_positive_cost) + len(false_negative_cost) ),
                'error_cost' : cost_fn_pred/(cost_correct_pred + cost_fn_pred), 
                'error' : len(false_negative_cost)/(len(correct) + len(false_positive_cost) + len(false_negative_cost) ),
                'customer_experience_cost' : 1 - (cost_correct_pred/(cost_correct_pred + cost_fp_pred)),  
                'customer_experience_rating' : 1 - (len(false_positive_cost)/(len(correct) + len(false_positive_cost) + len(false_negative_cost) ))
        }

In [30]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from catboost import CatBoostClassifier
#from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier, ExtraTreesClassifier

estimators = [
    ('rf', RandomForestClassifier(n_estimators=10, random_state=42)),
    ('svr', make_pipeline(StandardScaler(),
                          LinearSVC(random_state=42))),
    ('cbc', CatBoostClassifier(random_state=42)),
    ('abc', AdaBoostClassifier(random_state=42)),
    ('hgbc', HistGradientBoostingClassifier(random_state=42))
]


In [31]:
train_X = X_train_encoded.copy()
test_X = X_test_encoded.copy()

In [32]:
feature_sel_res = []
counter = 0
removed_cols = 'None'

for col in sorted_columns:
    
    print(f'{counter+1} Removed column: {removed_cols}')
    
    clfModel = StackingClassifier(
        estimators=estimators, final_estimator=LogisticRegression(random_state=42,)
    )
    
    clfModel.fit(train_X, y_train)
    
    y_train_pred2 = clfModel.predict(train_X)
    train_recall_score = recall_score(y_train, y_train_pred2)
    
    y_test_pred2 = clfModel.predict(test_X)
    test_recall_score = recall_score(y_test, y_test_pred2)
    
    train_model_rating2 = None
    test_model_rating2 = None
    
    if('amt' in train_X.columns):
        train_model_rating2 = cost_of_error_by_chebyshevs(train_X['amt'].to_list(), y_train, y_train_pred2)       
        test_model_rating2 = cost_of_error_by_chebyshevs(test_X['amt'].to_list(),y_test, y_test_pred2)
    
    feature_sel_res.append({ 
                            'removed_col' : removed_cols, 
                            'train' : {'recall':train_recall_score, 'rating':train_model_rating2},
                            'test' : {'recall':test_recall_score, 'rating':test_model_rating2} 
                            })
    
    train_X.drop(columns = [col] ,  inplace=True)
    test_X.drop(columns = [col] ,  inplace=True)
    removed_cols = col
    
    counter = counter+1

1 Removed column: None


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2773627	total: 365ms	remaining: 6m 4s
1:	learn: 0.1192477	total: 490ms	remaining: 4m 4s
2:	learn: 0.0549701	total: 603ms	remaining: 3m 20s
3:	learn: 0.0322359	total: 727ms	remaining: 3m 1s
4:	learn: 0.0227629	total: 849ms	remaining: 2m 48s
5:	learn: 0.0185692	total: 959ms	remaining: 2m 38s
6:	learn: 0.0164288	total: 1.06s	remaining: 2m 30s
7:	learn: 0.0143649	total: 1.19s	remaining: 2m 27s
8:	learn: 0.0127930	total: 1.31s	remaining: 2m 24s
9:	learn: 0.0119917	total: 1.44s	remaining: 2m 22s
10:	learn: 0.0116484	total: 1.55s	remaining: 2m 19s
11:	learn: 0.0111195	total: 1.98s	remaining: 2m 43s
12:	learn: 0.0107481	total: 2.22s	remaining: 2m 48s
13:	learn: 0.0104130	total: 2.4s	remaining: 2m 49s
14:	learn: 0.0100897	total: 2.59s	remaining: 2m 49s
15:	learn: 0.0099158	total: 2.76s	remaining: 2m 49s
16:	learn: 0.0096141	total: 2.92s	remaining: 2m 48s
17:	learn: 0.0091699	total: 3.07s	remaining: 2m 47s
18:	learn: 0.0090195	total: 3.26s	remaining: 2m 



Learning rate set to 0.168984
0:	learn: 0.3018140	total: 94.8ms	remaining: 1m 34s
1:	learn: 0.1372277	total: 204ms	remaining: 1m 41s
2:	learn: 0.0639049	total: 336ms	remaining: 1m 51s
3:	learn: 0.0384647	total: 431ms	remaining: 1m 47s
4:	learn: 0.0264229	total: 544ms	remaining: 1m 48s
5:	learn: 0.0199606	total: 647ms	remaining: 1m 47s
6:	learn: 0.0165741	total: 760ms	remaining: 1m 47s
7:	learn: 0.0145278	total: 867ms	remaining: 1m 47s
8:	learn: 0.0130005	total: 964ms	remaining: 1m 46s
9:	learn: 0.0120244	total: 1.06s	remaining: 1m 44s
10:	learn: 0.0113387	total: 1.18s	remaining: 1m 45s
11:	learn: 0.0109214	total: 1.28s	remaining: 1m 45s
12:	learn: 0.0105015	total: 1.39s	remaining: 1m 45s
13:	learn: 0.0101511	total: 1.51s	remaining: 1m 46s
14:	learn: 0.0099316	total: 1.62s	remaining: 1m 46s
15:	learn: 0.0097023	total: 1.74s	remaining: 1m 46s
16:	learn: 0.0094319	total: 1.84s	remaining: 1m 46s
17:	learn: 0.0092539	total: 1.95s	remaining: 1m 46s
18:	learn: 0.0090712	total: 2.06s	remaining



2 Removed column: state


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2690939	total: 143ms	remaining: 2m 22s
1:	learn: 0.1136577	total: 271ms	remaining: 2m 15s
2:	learn: 0.0533575	total: 390ms	remaining: 2m 9s
3:	learn: 0.0313890	total: 515ms	remaining: 2m 8s
4:	learn: 0.0229539	total: 626ms	remaining: 2m 4s
5:	learn: 0.0182542	total: 739ms	remaining: 2m 2s
6:	learn: 0.0163267	total: 855ms	remaining: 2m 1s
7:	learn: 0.0149659	total: 963ms	remaining: 1m 59s
8:	learn: 0.0135308	total: 1.09s	remaining: 2m
9:	learn: 0.0125146	total: 1.23s	remaining: 2m 1s
10:	learn: 0.0119861	total: 1.37s	remaining: 2m 3s
11:	learn: 0.0115914	total: 1.51s	remaining: 2m 4s
12:	learn: 0.0112845	total: 1.65s	remaining: 2m 5s
13:	learn: 0.0109624	total: 1.78s	remaining: 2m 5s
14:	learn: 0.0102422	total: 1.9s	remaining: 2m 4s
15:	learn: 0.0097356	total: 2.02s	remaining: 2m 4s
16:	learn: 0.0094441	total: 2.14s	remaining: 2m 3s
17:	learn: 0.0090314	total: 2.25s	remaining: 2m 3s
18:	learn: 0.0088444	total: 2.38s	remaining: 2m 2s
19:	learn: 0



Learning rate set to 0.168984
0:	learn: 0.2929449	total: 114ms	remaining: 1m 53s
1:	learn: 0.1253248	total: 219ms	remaining: 1m 49s
2:	learn: 0.0614057	total: 326ms	remaining: 1m 48s
3:	learn: 0.0363846	total: 443ms	remaining: 1m 50s
4:	learn: 0.0266964	total: 552ms	remaining: 1m 49s
5:	learn: 0.0209471	total: 669ms	remaining: 1m 50s
6:	learn: 0.0175943	total: 765ms	remaining: 1m 48s
7:	learn: 0.0157257	total: 865ms	remaining: 1m 47s
8:	learn: 0.0141050	total: 969ms	remaining: 1m 46s
9:	learn: 0.0128375	total: 1.08s	remaining: 1m 47s
10:	learn: 0.0122328	total: 1.19s	remaining: 1m 47s
11:	learn: 0.0114729	total: 1.3s	remaining: 1m 46s
12:	learn: 0.0110171	total: 1.42s	remaining: 1m 48s
13:	learn: 0.0107467	total: 1.54s	remaining: 1m 48s
14:	learn: 0.0103734	total: 1.65s	remaining: 1m 48s
15:	learn: 0.0100960	total: 1.76s	remaining: 1m 48s
16:	learn: 0.0098130	total: 1.89s	remaining: 1m 49s
17:	learn: 0.0092589	total: 2.01s	remaining: 1m 49s
18:	learn: 0.0091190	total: 2.11s	remaining: 



3 Removed column: ft_age_group


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2607161	total: 130ms	remaining: 2m 9s
1:	learn: 0.1106730	total: 264ms	remaining: 2m 11s
2:	learn: 0.0548510	total: 392ms	remaining: 2m 10s
3:	learn: 0.0332799	total: 526ms	remaining: 2m 10s
4:	learn: 0.0248342	total: 635ms	remaining: 2m 6s
5:	learn: 0.0204826	total: 737ms	remaining: 2m 2s
6:	learn: 0.0170343	total: 850ms	remaining: 2m
7:	learn: 0.0151166	total: 971ms	remaining: 2m
8:	learn: 0.0137356	total: 1.11s	remaining: 2m 2s
9:	learn: 0.0130026	total: 1.25s	remaining: 2m 3s
10:	learn: 0.0123417	total: 1.37s	remaining: 2m 2s
11:	learn: 0.0120970	total: 1.48s	remaining: 2m 1s
12:	learn: 0.0119170	total: 1.6s	remaining: 2m 1s
13:	learn: 0.0113576	total: 1.71s	remaining: 2m
14:	learn: 0.0111265	total: 1.83s	remaining: 1m 59s
15:	learn: 0.0108464	total: 1.97s	remaining: 2m
16:	learn: 0.0106290	total: 2.1s	remaining: 2m 1s
17:	learn: 0.0103585	total: 2.23s	remaining: 2m 1s
18:	learn: 0.0102229	total: 2.35s	remaining: 2m 1s
19:	learn: 0.0096830	



Learning rate set to 0.168984
0:	learn: 0.2857019	total: 117ms	remaining: 1m 56s
1:	learn: 0.1259898	total: 235ms	remaining: 1m 57s
2:	learn: 0.0634263	total: 352ms	remaining: 1m 57s
3:	learn: 0.0365323	total: 470ms	remaining: 1m 57s
4:	learn: 0.0268505	total: 568ms	remaining: 1m 53s
5:	learn: 0.0211404	total: 663ms	remaining: 1m 49s
6:	learn: 0.0169546	total: 762ms	remaining: 1m 48s
7:	learn: 0.0150536	total: 866ms	remaining: 1m 47s
8:	learn: 0.0137167	total: 971ms	remaining: 1m 46s
9:	learn: 0.0129973	total: 1.07s	remaining: 1m 46s
10:	learn: 0.0123220	total: 1.17s	remaining: 1m 45s
11:	learn: 0.0119544	total: 1.27s	remaining: 1m 44s
12:	learn: 0.0116223	total: 1.4s	remaining: 1m 46s
13:	learn: 0.0113491	total: 1.52s	remaining: 1m 47s
14:	learn: 0.0105302	total: 1.62s	remaining: 1m 46s
15:	learn: 0.0103144	total: 1.73s	remaining: 1m 46s
16:	learn: 0.0100169	total: 1.84s	remaining: 1m 46s
17:	learn: 0.0097800	total: 1.94s	remaining: 1m 45s
18:	learn: 0.0096373	total: 2.04s	remaining: 



4 Removed column: ft_trans_hour


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2839066	total: 115ms	remaining: 1m 54s
1:	learn: 0.1162220	total: 249ms	remaining: 2m 4s
2:	learn: 0.0635038	total: 412ms	remaining: 2m 16s
3:	learn: 0.0377352	total: 536ms	remaining: 2m 13s
4:	learn: 0.0260784	total: 646ms	remaining: 2m 8s
5:	learn: 0.0205420	total: 756ms	remaining: 2m 5s
6:	learn: 0.0178318	total: 867ms	remaining: 2m 2s
7:	learn: 0.0161631	total: 973ms	remaining: 2m
8:	learn: 0.0152326	total: 1.08s	remaining: 1m 58s
9:	learn: 0.0146555	total: 1.18s	remaining: 1m 56s
10:	learn: 0.0136493	total: 1.31s	remaining: 1m 57s
11:	learn: 0.0132131	total: 1.43s	remaining: 1m 57s
12:	learn: 0.0129006	total: 1.55s	remaining: 1m 57s
13:	learn: 0.0120304	total: 1.68s	remaining: 1m 58s
14:	learn: 0.0116664	total: 1.8s	remaining: 1m 57s
15:	learn: 0.0114286	total: 1.91s	remaining: 1m 57s
16:	learn: 0.0111510	total: 2.04s	remaining: 1m 58s
17:	learn: 0.0109274	total: 2.17s	remaining: 1m 58s
18:	learn: 0.0108067	total: 2.3s	remaining: 1m 58s
19



Learning rate set to 0.168984
0:	learn: 0.3079390	total: 101ms	remaining: 1m 40s
1:	learn: 0.1347137	total: 193ms	remaining: 1m 36s
2:	learn: 0.0716528	total: 302ms	remaining: 1m 40s
3:	learn: 0.0423094	total: 402ms	remaining: 1m 40s
4:	learn: 0.0316540	total: 497ms	remaining: 1m 38s
5:	learn: 0.0233431	total: 591ms	remaining: 1m 37s
6:	learn: 0.0192096	total: 686ms	remaining: 1m 37s
7:	learn: 0.0170555	total: 774ms	remaining: 1m 36s
8:	learn: 0.0156342	total: 865ms	remaining: 1m 35s
9:	learn: 0.0148386	total: 954ms	remaining: 1m 34s
10:	learn: 0.0141665	total: 1.06s	remaining: 1m 35s
11:	learn: 0.0134181	total: 1.14s	remaining: 1m 34s
12:	learn: 0.0130971	total: 1.24s	remaining: 1m 34s
13:	learn: 0.0127948	total: 1.33s	remaining: 1m 34s
14:	learn: 0.0124319	total: 1.43s	remaining: 1m 33s
15:	learn: 0.0121986	total: 1.53s	remaining: 1m 33s
16:	learn: 0.0115589	total: 1.62s	remaining: 1m 33s
17:	learn: 0.0112841	total: 1.71s	remaining: 1m 33s
18:	learn: 0.0111187	total: 1.8s	remaining: 



5 Removed column: ft_amt_deviation


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2897961	total: 139ms	remaining: 2m 19s
1:	learn: 0.1178426	total: 265ms	remaining: 2m 12s
2:	learn: 0.0630788	total: 384ms	remaining: 2m 7s
3:	learn: 0.0381905	total: 492ms	remaining: 2m 2s
4:	learn: 0.0259414	total: 605ms	remaining: 2m
5:	learn: 0.0200346	total: 722ms	remaining: 1m 59s
6:	learn: 0.0176987	total: 835ms	remaining: 1m 58s
7:	learn: 0.0161763	total: 976ms	remaining: 2m 1s
8:	learn: 0.0151909	total: 1.1s	remaining: 2m 1s
9:	learn: 0.0143622	total: 1.23s	remaining: 2m 1s
10:	learn: 0.0140185	total: 1.35s	remaining: 2m 1s
11:	learn: 0.0136783	total: 1.48s	remaining: 2m 1s
12:	learn: 0.0126606	total: 1.61s	remaining: 2m 2s
13:	learn: 0.0124808	total: 1.74s	remaining: 2m 2s
14:	learn: 0.0122367	total: 1.85s	remaining: 2m 1s
15:	learn: 0.0120131	total: 1.96s	remaining: 2m
16:	learn: 0.0119015	total: 2.07s	remaining: 1m 59s
17:	learn: 0.0112651	total: 2.19s	remaining: 1m 59s
18:	learn: 0.0111877	total: 2.31s	remaining: 1m 59s
19:	learn: 



Learning rate set to 0.168984
0:	learn: 0.3144949	total: 108ms	remaining: 1m 47s
1:	learn: 0.1381818	total: 210ms	remaining: 1m 44s
2:	learn: 0.0850936	total: 328ms	remaining: 1m 49s
3:	learn: 0.0493959	total: 445ms	remaining: 1m 50s
4:	learn: 0.0318192	total: 552ms	remaining: 1m 49s
5:	learn: 0.0236426	total: 650ms	remaining: 1m 47s
6:	learn: 0.0195255	total: 752ms	remaining: 1m 46s
7:	learn: 0.0172742	total: 850ms	remaining: 1m 45s
8:	learn: 0.0162338	total: 945ms	remaining: 1m 44s
9:	learn: 0.0149591	total: 1.04s	remaining: 1m 42s
10:	learn: 0.0144869	total: 1.13s	remaining: 1m 42s
11:	learn: 0.0139330	total: 1.23s	remaining: 1m 40s
12:	learn: 0.0135291	total: 1.33s	remaining: 1m 40s
13:	learn: 0.0131756	total: 1.42s	remaining: 1m 40s
14:	learn: 0.0130207	total: 1.51s	remaining: 1m 39s
15:	learn: 0.0128254	total: 1.61s	remaining: 1m 38s
16:	learn: 0.0125537	total: 1.71s	remaining: 1m 38s
17:	learn: 0.0123777	total: 1.8s	remaining: 1m 38s
18:	learn: 0.0123032	total: 1.9s	remaining: 1



6 Removed column: city


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2879726	total: 126ms	remaining: 2m 5s
1:	learn: 0.1168823	total: 261ms	remaining: 2m 10s
2:	learn: 0.0595940	total: 402ms	remaining: 2m 13s
3:	learn: 0.0358649	total: 525ms	remaining: 2m 10s
4:	learn: 0.0257045	total: 644ms	remaining: 2m 8s
5:	learn: 0.0201401	total: 762ms	remaining: 2m 6s
6:	learn: 0.0175858	total: 872ms	remaining: 2m 3s
7:	learn: 0.0163302	total: 1.02s	remaining: 2m 6s
8:	learn: 0.0153966	total: 1.16s	remaining: 2m 7s
9:	learn: 0.0146771	total: 1.29s	remaining: 2m 7s
10:	learn: 0.0130821	total: 1.43s	remaining: 2m 8s
11:	learn: 0.0124888	total: 1.56s	remaining: 2m 8s
12:	learn: 0.0121363	total: 1.7s	remaining: 2m 9s
13:	learn: 0.0119677	total: 1.82s	remaining: 2m 8s
14:	learn: 0.0117367	total: 1.94s	remaining: 2m 7s
15:	learn: 0.0112307	total: 2.06s	remaining: 2m 6s
16:	learn: 0.0111166	total: 2.17s	remaining: 2m 5s
17:	learn: 0.0109400	total: 2.3s	remaining: 2m 5s
18:	learn: 0.0107798	total: 2.47s	remaining: 2m 7s
19:	learn:



Learning rate set to 0.168984
0:	learn: 0.3141052	total: 86.2ms	remaining: 1m 26s
1:	learn: 0.1348708	total: 178ms	remaining: 1m 28s
2:	learn: 0.0685745	total: 268ms	remaining: 1m 29s
3:	learn: 0.0414943	total: 357ms	remaining: 1m 28s
4:	learn: 0.0292118	total: 454ms	remaining: 1m 30s
5:	learn: 0.0228609	total: 549ms	remaining: 1m 30s
6:	learn: 0.0192726	total: 635ms	remaining: 1m 30s
7:	learn: 0.0172010	total: 721ms	remaining: 1m 29s
8:	learn: 0.0161833	total: 804ms	remaining: 1m 28s
9:	learn: 0.0151710	total: 886ms	remaining: 1m 27s
10:	learn: 0.0145197	total: 971ms	remaining: 1m 27s
11:	learn: 0.0128576	total: 1.05s	remaining: 1m 26s
12:	learn: 0.0123661	total: 1.14s	remaining: 1m 26s
13:	learn: 0.0119786	total: 1.23s	remaining: 1m 26s
14:	learn: 0.0116994	total: 1.34s	remaining: 1m 28s
15:	learn: 0.0115286	total: 1.43s	remaining: 1m 28s
16:	learn: 0.0113334	total: 1.52s	remaining: 1m 28s
17:	learn: 0.0112368	total: 1.61s	remaining: 1m 27s
18:	learn: 0.0107393	total: 1.7s	remaining:



7 Removed column: city_pop


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2944715	total: 117ms	remaining: 1m 57s
1:	learn: 0.1248799	total: 222ms	remaining: 1m 50s
2:	learn: 0.0684190	total: 327ms	remaining: 1m 48s
3:	learn: 0.0377284	total: 438ms	remaining: 1m 49s
4:	learn: 0.0276038	total: 538ms	remaining: 1m 47s
5:	learn: 0.0210104	total: 634ms	remaining: 1m 45s
6:	learn: 0.0183023	total: 730ms	remaining: 1m 43s
7:	learn: 0.0164403	total: 827ms	remaining: 1m 42s
8:	learn: 0.0154683	total: 925ms	remaining: 1m 41s
9:	learn: 0.0145225	total: 1.02s	remaining: 1m 41s
10:	learn: 0.0138521	total: 1.13s	remaining: 1m 41s
11:	learn: 0.0133191	total: 1.23s	remaining: 1m 41s
12:	learn: 0.0129890	total: 1.34s	remaining: 1m 41s
13:	learn: 0.0123620	total: 1.44s	remaining: 1m 41s
14:	learn: 0.0121780	total: 1.53s	remaining: 1m 40s
15:	learn: 0.0119079	total: 1.64s	remaining: 1m 40s
16:	learn: 0.0116941	total: 1.74s	remaining: 1m 40s
17:	learn: 0.0115477	total: 1.85s	remaining: 1m 41s
18:	learn: 0.0114440	total: 1.96s	remaining:



Learning rate set to 0.168984
0:	learn: 0.3188068	total: 90.3ms	remaining: 1m 30s
1:	learn: 0.1434766	total: 172ms	remaining: 1m 25s
2:	learn: 0.0801628	total: 257ms	remaining: 1m 25s
3:	learn: 0.0465050	total: 338ms	remaining: 1m 24s
4:	learn: 0.0333137	total: 417ms	remaining: 1m 23s
5:	learn: 0.0243651	total: 499ms	remaining: 1m 22s
6:	learn: 0.0203050	total: 575ms	remaining: 1m 21s
7:	learn: 0.0177439	total: 658ms	remaining: 1m 21s
8:	learn: 0.0162441	total: 736ms	remaining: 1m 20s
9:	learn: 0.0153564	total: 809ms	remaining: 1m 20s
10:	learn: 0.0135299	total: 884ms	remaining: 1m 19s
11:	learn: 0.0130022	total: 969ms	remaining: 1m 19s
12:	learn: 0.0126032	total: 1.05s	remaining: 1m 19s
13:	learn: 0.0124011	total: 1.13s	remaining: 1m 19s
14:	learn: 0.0119393	total: 1.21s	remaining: 1m 19s
15:	learn: 0.0116439	total: 1.29s	remaining: 1m 19s
16:	learn: 0.0114533	total: 1.37s	remaining: 1m 19s
17:	learn: 0.0112403	total: 1.45s	remaining: 1m 19s
18:	learn: 0.0110730	total: 1.53s	remaining



8 Removed column: ft_transaction_is_recurring


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2722975	total: 104ms	remaining: 1m 44s
1:	learn: 0.1110494	total: 202ms	remaining: 1m 40s
2:	learn: 0.0557566	total: 296ms	remaining: 1m 38s
3:	learn: 0.0336571	total: 387ms	remaining: 1m 36s
4:	learn: 0.0251478	total: 487ms	remaining: 1m 36s
5:	learn: 0.0203036	total: 583ms	remaining: 1m 36s
6:	learn: 0.0180623	total: 678ms	remaining: 1m 36s
7:	learn: 0.0166423	total: 769ms	remaining: 1m 35s
8:	learn: 0.0154524	total: 866ms	remaining: 1m 35s
9:	learn: 0.0142029	total: 970ms	remaining: 1m 36s
10:	learn: 0.0135536	total: 1.07s	remaining: 1m 36s
11:	learn: 0.0129969	total: 1.17s	remaining: 1m 36s
12:	learn: 0.0126842	total: 1.27s	remaining: 1m 36s
13:	learn: 0.0120019	total: 1.37s	remaining: 1m 36s
14:	learn: 0.0117694	total: 1.48s	remaining: 1m 36s
15:	learn: 0.0116165	total: 1.57s	remaining: 1m 36s
16:	learn: 0.0114204	total: 1.67s	remaining: 1m 36s
17:	learn: 0.0113058	total: 1.77s	remaining: 1m 36s
18:	learn: 0.0109562	total: 1.87s	remaining:



Learning rate set to 0.168984
0:	learn: 0.2964424	total: 83.8ms	remaining: 1m 23s
1:	learn: 0.1362490	total: 166ms	remaining: 1m 23s
2:	learn: 0.0683583	total: 248ms	remaining: 1m 22s
3:	learn: 0.0405491	total: 331ms	remaining: 1m 22s
4:	learn: 0.0284748	total: 414ms	remaining: 1m 22s
5:	learn: 0.0223808	total: 490ms	remaining: 1m 21s
6:	learn: 0.0190961	total: 572ms	remaining: 1m 21s
7:	learn: 0.0169389	total: 653ms	remaining: 1m 20s
8:	learn: 0.0156677	total: 730ms	remaining: 1m 20s
9:	learn: 0.0145409	total: 815ms	remaining: 1m 20s
10:	learn: 0.0140395	total: 896ms	remaining: 1m 20s
11:	learn: 0.0136432	total: 978ms	remaining: 1m 20s
12:	learn: 0.0131471	total: 1.06s	remaining: 1m 20s
13:	learn: 0.0123602	total: 1.14s	remaining: 1m 20s
14:	learn: 0.0121276	total: 1.22s	remaining: 1m 19s
15:	learn: 0.0118395	total: 1.3s	remaining: 1m 20s
16:	learn: 0.0116708	total: 1.39s	remaining: 1m 20s
17:	learn: 0.0114279	total: 1.48s	remaining: 1m 20s
18:	learn: 0.0112533	total: 1.56s	remaining:



9 Removed column: gender


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2955799	total: 97.7ms	remaining: 1m 37s
1:	learn: 0.1269811	total: 199ms	remaining: 1m 39s
2:	learn: 0.0625390	total: 288ms	remaining: 1m 35s
3:	learn: 0.0362190	total: 382ms	remaining: 1m 35s
4:	learn: 0.0258012	total: 479ms	remaining: 1m 35s
5:	learn: 0.0206591	total: 574ms	remaining: 1m 35s
6:	learn: 0.0181222	total: 667ms	remaining: 1m 34s
7:	learn: 0.0165167	total: 756ms	remaining: 1m 33s
8:	learn: 0.0157602	total: 849ms	remaining: 1m 33s
9:	learn: 0.0150744	total: 939ms	remaining: 1m 32s
10:	learn: 0.0137624	total: 1.05s	remaining: 1m 34s
11:	learn: 0.0132990	total: 1.15s	remaining: 1m 34s
12:	learn: 0.0124948	total: 1.25s	remaining: 1m 35s
13:	learn: 0.0123321	total: 1.34s	remaining: 1m 34s
14:	learn: 0.0120580	total: 1.44s	remaining: 1m 34s
15:	learn: 0.0118042	total: 1.54s	remaining: 1m 34s
16:	learn: 0.0116004	total: 1.64s	remaining: 1m 34s
17:	learn: 0.0111371	total: 1.74s	remaining: 1m 34s
18:	learn: 0.0110115	total: 1.84s	remaining



Learning rate set to 0.168984
0:	learn: 0.3201877	total: 77.6ms	remaining: 1m 17s
1:	learn: 0.1320566	total: 157ms	remaining: 1m 18s
2:	learn: 0.0647630	total: 239ms	remaining: 1m 19s
3:	learn: 0.0390624	total: 321ms	remaining: 1m 19s
4:	learn: 0.0274462	total: 398ms	remaining: 1m 19s
5:	learn: 0.0220409	total: 477ms	remaining: 1m 18s
6:	learn: 0.0187580	total: 556ms	remaining: 1m 18s
7:	learn: 0.0170836	total: 633ms	remaining: 1m 18s
8:	learn: 0.0160081	total: 708ms	remaining: 1m 17s
9:	learn: 0.0151041	total: 790ms	remaining: 1m 18s
10:	learn: 0.0138372	total: 870ms	remaining: 1m 18s
11:	learn: 0.0132932	total: 949ms	remaining: 1m 18s
12:	learn: 0.0128330	total: 1.03s	remaining: 1m 18s
13:	learn: 0.0124038	total: 1.1s	remaining: 1m 17s
14:	learn: 0.0118218	total: 1.18s	remaining: 1m 17s
15:	learn: 0.0116083	total: 1.27s	remaining: 1m 17s
16:	learn: 0.0114168	total: 1.35s	remaining: 1m 18s
17:	learn: 0.0112642	total: 1.43s	remaining: 1m 18s
18:	learn: 0.0111318	total: 1.51s	remaining:



10 Removed column: job


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2767310	total: 118ms	remaining: 1m 57s
1:	learn: 0.1100101	total: 213ms	remaining: 1m 46s
2:	learn: 0.0553171	total: 306ms	remaining: 1m 41s
3:	learn: 0.0344800	total: 399ms	remaining: 1m 39s
4:	learn: 0.0252017	total: 496ms	remaining: 1m 38s
5:	learn: 0.0210489	total: 592ms	remaining: 1m 38s
6:	learn: 0.0180719	total: 683ms	remaining: 1m 36s
7:	learn: 0.0168412	total: 774ms	remaining: 1m 35s
8:	learn: 0.0146980	total: 882ms	remaining: 1m 37s
9:	learn: 0.0141759	total: 986ms	remaining: 1m 37s
10:	learn: 0.0135643	total: 1.1s	remaining: 1m 38s
11:	learn: 0.0129275	total: 1.2s	remaining: 1m 39s
12:	learn: 0.0126574	total: 1.3s	remaining: 1m 38s
13:	learn: 0.0124239	total: 1.39s	remaining: 1m 38s
14:	learn: 0.0118231	total: 1.49s	remaining: 1m 37s
15:	learn: 0.0115661	total: 1.58s	remaining: 1m 37s
16:	learn: 0.0114774	total: 1.68s	remaining: 1m 37s
17:	learn: 0.0111871	total: 1.78s	remaining: 1m 37s
18:	learn: 0.0110954	total: 1.88s	remaining: 1m



Learning rate set to 0.168984
0:	learn: 0.3020744	total: 86.3ms	remaining: 1m 26s
1:	learn: 0.1299323	total: 165ms	remaining: 1m 22s
2:	learn: 0.0660715	total: 246ms	remaining: 1m 21s
3:	learn: 0.0405711	total: 324ms	remaining: 1m 20s
4:	learn: 0.0284199	total: 406ms	remaining: 1m 20s
5:	learn: 0.0224883	total: 486ms	remaining: 1m 20s
6:	learn: 0.0197281	total: 558ms	remaining: 1m 19s
7:	learn: 0.0178746	total: 638ms	remaining: 1m 19s
8:	learn: 0.0167112	total: 716ms	remaining: 1m 18s
9:	learn: 0.0156005	total: 798ms	remaining: 1m 19s
10:	learn: 0.0149287	total: 870ms	remaining: 1m 18s
11:	learn: 0.0140675	total: 951ms	remaining: 1m 18s
12:	learn: 0.0135566	total: 1.03s	remaining: 1m 17s
13:	learn: 0.0133428	total: 1.1s	remaining: 1m 17s
14:	learn: 0.0131120	total: 1.17s	remaining: 1m 17s
15:	learn: 0.0127722	total: 1.25s	remaining: 1m 17s
16:	learn: 0.0125542	total: 1.33s	remaining: 1m 17s
17:	learn: 0.0124021	total: 1.41s	remaining: 1m 16s
18:	learn: 0.0122008	total: 1.49s	remaining:



11 Removed column: ft_trans_day


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2733912	total: 99.6ms	remaining: 1m 39s
1:	learn: 0.1098885	total: 191ms	remaining: 1m 35s
2:	learn: 0.0541495	total: 287ms	remaining: 1m 35s
3:	learn: 0.0333511	total: 381ms	remaining: 1m 34s
4:	learn: 0.0246625	total: 480ms	remaining: 1m 35s
5:	learn: 0.0202186	total: 577ms	remaining: 1m 35s
6:	learn: 0.0179418	total: 676ms	remaining: 1m 35s
7:	learn: 0.0164683	total: 773ms	remaining: 1m 35s
8:	learn: 0.0155077	total: 864ms	remaining: 1m 35s
9:	learn: 0.0149594	total: 954ms	remaining: 1m 34s
10:	learn: 0.0135392	total: 1.05s	remaining: 1m 34s
11:	learn: 0.0132271	total: 1.14s	remaining: 1m 34s
12:	learn: 0.0129338	total: 1.24s	remaining: 1m 34s
13:	learn: 0.0124234	total: 1.34s	remaining: 1m 34s
14:	learn: 0.0121429	total: 1.43s	remaining: 1m 34s
15:	learn: 0.0119892	total: 1.52s	remaining: 1m 33s
16:	learn: 0.0119112	total: 1.62s	remaining: 1m 33s
17:	learn: 0.0117869	total: 1.71s	remaining: 1m 33s
18:	learn: 0.0112613	total: 1.81s	remaining



Learning rate set to 0.168984
0:	learn: 0.3230862	total: 90ms	remaining: 1m 29s
1:	learn: 0.1404751	total: 167ms	remaining: 1m 23s
2:	learn: 0.0713713	total: 250ms	remaining: 1m 23s
3:	learn: 0.0407056	total: 324ms	remaining: 1m 20s
4:	learn: 0.0285805	total: 402ms	remaining: 1m 20s
5:	learn: 0.0223114	total: 479ms	remaining: 1m 19s
6:	learn: 0.0195468	total: 560ms	remaining: 1m 19s
7:	learn: 0.0179342	total: 652ms	remaining: 1m 20s
8:	learn: 0.0164154	total: 724ms	remaining: 1m 19s
9:	learn: 0.0156130	total: 799ms	remaining: 1m 19s
10:	learn: 0.0148696	total: 879ms	remaining: 1m 19s
11:	learn: 0.0144256	total: 961ms	remaining: 1m 19s
12:	learn: 0.0139750	total: 1.03s	remaining: 1m 18s
13:	learn: 0.0137170	total: 1.11s	remaining: 1m 17s
14:	learn: 0.0135073	total: 1.18s	remaining: 1m 17s
15:	learn: 0.0132158	total: 1.26s	remaining: 1m 17s
16:	learn: 0.0122009	total: 1.34s	remaining: 1m 17s
17:	learn: 0.0120231	total: 1.42s	remaining: 1m 17s
18:	learn: 0.0113249	total: 1.5s	remaining: 1



12 Removed column: ft_distance_user_merchant


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2629527	total: 98.9ms	remaining: 1m 38s
1:	learn: 0.1138940	total: 186ms	remaining: 1m 32s
2:	learn: 0.0606991	total: 276ms	remaining: 1m 31s
3:	learn: 0.0353103	total: 367ms	remaining: 1m 31s
4:	learn: 0.0255561	total: 458ms	remaining: 1m 31s
5:	learn: 0.0209777	total: 546ms	remaining: 1m 30s
6:	learn: 0.0181462	total: 637ms	remaining: 1m 30s
7:	learn: 0.0167188	total: 727ms	remaining: 1m 30s
8:	learn: 0.0158769	total: 816ms	remaining: 1m 29s
9:	learn: 0.0152929	total: 908ms	remaining: 1m 29s
10:	learn: 0.0146302	total: 997ms	remaining: 1m 29s
11:	learn: 0.0142447	total: 1.09s	remaining: 1m 30s
12:	learn: 0.0138239	total: 1.19s	remaining: 1m 30s
13:	learn: 0.0134237	total: 1.27s	remaining: 1m 29s
14:	learn: 0.0125629	total: 1.37s	remaining: 1m 29s
15:	learn: 0.0124351	total: 1.46s	remaining: 1m 30s
16:	learn: 0.0118561	total: 1.55s	remaining: 1m 29s
17:	learn: 0.0116735	total: 1.65s	remaining: 1m 30s
18:	learn: 0.0114262	total: 1.75s	remaining



Learning rate set to 0.168984
0:	learn: 0.2883810	total: 84.8ms	remaining: 1m 24s
1:	learn: 0.1328707	total: 158ms	remaining: 1m 18s
2:	learn: 0.0725936	total: 235ms	remaining: 1m 17s
3:	learn: 0.0428263	total: 309ms	remaining: 1m 16s
4:	learn: 0.0298521	total: 385ms	remaining: 1m 16s
5:	learn: 0.0231585	total: 459ms	remaining: 1m 16s
6:	learn: 0.0196249	total: 529ms	remaining: 1m 15s
7:	learn: 0.0177600	total: 605ms	remaining: 1m 14s
8:	learn: 0.0162899	total: 684ms	remaining: 1m 15s
9:	learn: 0.0154617	total: 761ms	remaining: 1m 15s
10:	learn: 0.0148361	total: 834ms	remaining: 1m 14s
11:	learn: 0.0142655	total: 909ms	remaining: 1m 14s
12:	learn: 0.0138851	total: 989ms	remaining: 1m 15s
13:	learn: 0.0130949	total: 1.06s	remaining: 1m 14s
14:	learn: 0.0128770	total: 1.14s	remaining: 1m 14s
15:	learn: 0.0126986	total: 1.22s	remaining: 1m 15s
16:	learn: 0.0120310	total: 1.29s	remaining: 1m 14s
17:	learn: 0.0118803	total: 1.37s	remaining: 1m 14s
18:	learn: 0.0116774	total: 1.45s	remaining



13 Removed column: ft_merchant_popularity


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2658692	total: 101ms	remaining: 1m 40s
1:	learn: 0.1127050	total: 196ms	remaining: 1m 37s
2:	learn: 0.0532430	total: 296ms	remaining: 1m 38s
3:	learn: 0.0335798	total: 386ms	remaining: 1m 36s
4:	learn: 0.0244808	total: 483ms	remaining: 1m 36s
5:	learn: 0.0201651	total: 570ms	remaining: 1m 34s
6:	learn: 0.0176115	total: 658ms	remaining: 1m 33s
7:	learn: 0.0162506	total: 753ms	remaining: 1m 33s
8:	learn: 0.0154351	total: 843ms	remaining: 1m 32s
9:	learn: 0.0147448	total: 930ms	remaining: 1m 32s
10:	learn: 0.0142200	total: 1.03s	remaining: 1m 32s
11:	learn: 0.0138280	total: 1.13s	remaining: 1m 32s
12:	learn: 0.0135082	total: 1.22s	remaining: 1m 32s
13:	learn: 0.0133220	total: 1.32s	remaining: 1m 33s
14:	learn: 0.0131775	total: 1.42s	remaining: 1m 33s
15:	learn: 0.0129532	total: 1.51s	remaining: 1m 32s
16:	learn: 0.0125815	total: 1.6s	remaining: 1m 32s
17:	learn: 0.0124809	total: 1.69s	remaining: 1m 32s
18:	learn: 0.0123645	total: 1.79s	remaining: 



Learning rate set to 0.168984
0:	learn: 0.3012722	total: 88.8ms	remaining: 1m 28s
1:	learn: 0.1421010	total: 162ms	remaining: 1m 20s
2:	learn: 0.0693709	total: 243ms	remaining: 1m 20s
3:	learn: 0.0405950	total: 318ms	remaining: 1m 19s
4:	learn: 0.0285084	total: 394ms	remaining: 1m 18s
5:	learn: 0.0225133	total: 469ms	remaining: 1m 17s
6:	learn: 0.0195245	total: 539ms	remaining: 1m 16s
7:	learn: 0.0175670	total: 613ms	remaining: 1m 16s
8:	learn: 0.0162544	total: 687ms	remaining: 1m 15s
9:	learn: 0.0154794	total: 757ms	remaining: 1m 14s
10:	learn: 0.0149102	total: 837ms	remaining: 1m 15s
11:	learn: 0.0141123	total: 913ms	remaining: 1m 15s
12:	learn: 0.0137979	total: 988ms	remaining: 1m 15s
13:	learn: 0.0135273	total: 1.06s	remaining: 1m 14s
14:	learn: 0.0132594	total: 1.13s	remaining: 1m 14s
15:	learn: 0.0131121	total: 1.2s	remaining: 1m 13s
16:	learn: 0.0128908	total: 1.28s	remaining: 1m 14s
17:	learn: 0.0126310	total: 1.36s	remaining: 1m 14s
18:	learn: 0.0124782	total: 1.43s	remaining:



14 Removed column: ft_trans_day_of_year


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2712035	total: 102ms	remaining: 1m 41s
1:	learn: 0.1214948	total: 193ms	remaining: 1m 36s
2:	learn: 0.0571810	total: 287ms	remaining: 1m 35s
3:	learn: 0.0342589	total: 378ms	remaining: 1m 34s
4:	learn: 0.0245509	total: 470ms	remaining: 1m 33s
5:	learn: 0.0199532	total: 561ms	remaining: 1m 32s
6:	learn: 0.0178254	total: 651ms	remaining: 1m 32s
7:	learn: 0.0165711	total: 740ms	remaining: 1m 31s
8:	learn: 0.0155415	total: 829ms	remaining: 1m 31s
9:	learn: 0.0149562	total: 910ms	remaining: 1m 30s
10:	learn: 0.0145232	total: 998ms	remaining: 1m 29s
11:	learn: 0.0140523	total: 1.09s	remaining: 1m 29s
12:	learn: 0.0136689	total: 1.18s	remaining: 1m 29s
13:	learn: 0.0134906	total: 1.28s	remaining: 1m 30s
14:	learn: 0.0132973	total: 1.37s	remaining: 1m 29s
15:	learn: 0.0130433	total: 1.46s	remaining: 1m 29s
16:	learn: 0.0128947	total: 1.55s	remaining: 1m 29s
17:	learn: 0.0123607	total: 1.65s	remaining: 1m 29s
18:	learn: 0.0122512	total: 1.74s	remaining:



Learning rate set to 0.168984
0:	learn: 0.2962103	total: 86.6ms	remaining: 1m 26s
1:	learn: 0.1448597	total: 153ms	remaining: 1m 16s
2:	learn: 0.0710820	total: 243ms	remaining: 1m 20s
3:	learn: 0.0418433	total: 320ms	remaining: 1m 19s
4:	learn: 0.0289076	total: 393ms	remaining: 1m 18s
5:	learn: 0.0236387	total: 467ms	remaining: 1m 17s
6:	learn: 0.0200423	total: 542ms	remaining: 1m 16s
7:	learn: 0.0180629	total: 624ms	remaining: 1m 17s
8:	learn: 0.0165865	total: 697ms	remaining: 1m 16s
9:	learn: 0.0158208	total: 770ms	remaining: 1m 16s
10:	learn: 0.0150865	total: 848ms	remaining: 1m 16s
11:	learn: 0.0145243	total: 919ms	remaining: 1m 15s
12:	learn: 0.0141400	total: 996ms	remaining: 1m 15s
13:	learn: 0.0138801	total: 1.07s	remaining: 1m 15s
14:	learn: 0.0134835	total: 1.15s	remaining: 1m 15s
15:	learn: 0.0133008	total: 1.23s	remaining: 1m 15s
16:	learn: 0.0131743	total: 1.31s	remaining: 1m 15s
17:	learn: 0.0130277	total: 1.39s	remaining: 1m 15s
18:	learn: 0.0129005	total: 1.46s	remaining



15 Removed column: ft_merchant_distance_from_user_mean


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2918923	total: 93.1ms	remaining: 1m 32s
1:	learn: 0.1152568	total: 184ms	remaining: 1m 32s
2:	learn: 0.0551758	total: 273ms	remaining: 1m 30s
3:	learn: 0.0333152	total: 365ms	remaining: 1m 30s
4:	learn: 0.0242038	total: 459ms	remaining: 1m 31s
5:	learn: 0.0202305	total: 559ms	remaining: 1m 32s
6:	learn: 0.0180071	total: 655ms	remaining: 1m 32s
7:	learn: 0.0164759	total: 772ms	remaining: 1m 35s
8:	learn: 0.0157086	total: 874ms	remaining: 1m 36s
9:	learn: 0.0151434	total: 962ms	remaining: 1m 35s
10:	learn: 0.0144100	total: 1.05s	remaining: 1m 34s
11:	learn: 0.0140058	total: 1.14s	remaining: 1m 33s
12:	learn: 0.0135755	total: 1.24s	remaining: 1m 33s
13:	learn: 0.0133646	total: 1.33s	remaining: 1m 33s
14:	learn: 0.0131803	total: 1.44s	remaining: 1m 34s
15:	learn: 0.0129235	total: 1.56s	remaining: 1m 35s
16:	learn: 0.0127557	total: 1.65s	remaining: 1m 35s
17:	learn: 0.0126588	total: 1.74s	remaining: 1m 35s
18:	learn: 0.0125457	total: 1.83s	remaining



Learning rate set to 0.168984
0:	learn: 0.3162902	total: 97.5ms	remaining: 1m 37s
1:	learn: 0.1398231	total: 167ms	remaining: 1m 23s
2:	learn: 0.0783213	total: 242ms	remaining: 1m 20s
3:	learn: 0.0443312	total: 315ms	remaining: 1m 18s
4:	learn: 0.0301289	total: 391ms	remaining: 1m 17s
5:	learn: 0.0226575	total: 464ms	remaining: 1m 16s
6:	learn: 0.0191664	total: 537ms	remaining: 1m 16s
7:	learn: 0.0175467	total: 611ms	remaining: 1m 15s
8:	learn: 0.0165204	total: 680ms	remaining: 1m 14s
9:	learn: 0.0154225	total: 757ms	remaining: 1m 14s
10:	learn: 0.0148030	total: 829ms	remaining: 1m 14s
11:	learn: 0.0141996	total: 902ms	remaining: 1m 14s
12:	learn: 0.0137713	total: 980ms	remaining: 1m 14s
13:	learn: 0.0134847	total: 1.06s	remaining: 1m 14s
14:	learn: 0.0131284	total: 1.13s	remaining: 1m 14s
15:	learn: 0.0129691	total: 1.21s	remaining: 1m 14s
16:	learn: 0.0128251	total: 1.29s	remaining: 1m 14s
17:	learn: 0.0125666	total: 1.36s	remaining: 1m 14s
18:	learn: 0.0124106	total: 1.44s	remaining



16 Removed column: merchant


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2520521	total: 97.6ms	remaining: 1m 37s
1:	learn: 0.1027322	total: 188ms	remaining: 1m 34s
2:	learn: 0.0520130	total: 284ms	remaining: 1m 34s
3:	learn: 0.0326278	total: 374ms	remaining: 1m 33s
4:	learn: 0.0239688	total: 470ms	remaining: 1m 33s
5:	learn: 0.0198239	total: 558ms	remaining: 1m 32s
6:	learn: 0.0176059	total: 647ms	remaining: 1m 31s
7:	learn: 0.0162120	total: 741ms	remaining: 1m 31s
8:	learn: 0.0154021	total: 833ms	remaining: 1m 31s
9:	learn: 0.0148479	total: 919ms	remaining: 1m 31s
10:	learn: 0.0142659	total: 1.01s	remaining: 1m 30s
11:	learn: 0.0138624	total: 1.1s	remaining: 1m 30s
12:	learn: 0.0135848	total: 1.19s	remaining: 1m 30s
13:	learn: 0.0132843	total: 1.28s	remaining: 1m 30s
14:	learn: 0.0129982	total: 1.38s	remaining: 1m 30s
15:	learn: 0.0128127	total: 1.47s	remaining: 1m 30s
16:	learn: 0.0123581	total: 1.56s	remaining: 1m 30s
17:	learn: 0.0122219	total: 1.65s	remaining: 1m 30s
18:	learn: 0.0120874	total: 1.75s	remaining:



Learning rate set to 0.168984
0:	learn: 0.2775322	total: 83.1ms	remaining: 1m 23s
1:	learn: 0.1347188	total: 156ms	remaining: 1m 17s
2:	learn: 0.0726865	total: 234ms	remaining: 1m 17s
3:	learn: 0.0422556	total: 313ms	remaining: 1m 18s
4:	learn: 0.0288407	total: 389ms	remaining: 1m 17s
5:	learn: 0.0224624	total: 464ms	remaining: 1m 16s
6:	learn: 0.0190653	total: 539ms	remaining: 1m 16s
7:	learn: 0.0172929	total: 611ms	remaining: 1m 15s
8:	learn: 0.0159195	total: 685ms	remaining: 1m 15s
9:	learn: 0.0151339	total: 753ms	remaining: 1m 14s
10:	learn: 0.0146376	total: 830ms	remaining: 1m 14s
11:	learn: 0.0142953	total: 907ms	remaining: 1m 14s
12:	learn: 0.0139247	total: 979ms	remaining: 1m 14s
13:	learn: 0.0135486	total: 1.06s	remaining: 1m 14s
14:	learn: 0.0132884	total: 1.14s	remaining: 1m 14s
15:	learn: 0.0131503	total: 1.22s	remaining: 1m 14s
16:	learn: 0.0129519	total: 1.29s	remaining: 1m 14s
17:	learn: 0.0126821	total: 1.36s	remaining: 1m 14s
18:	learn: 0.0125283	total: 1.44s	remaining



17 Removed column: ft_transaction_count_per_user


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2953692	total: 92.4ms	remaining: 1m 32s
1:	learn: 0.1128689	total: 183ms	remaining: 1m 31s
2:	learn: 0.0550147	total: 279ms	remaining: 1m 32s
3:	learn: 0.0337674	total: 381ms	remaining: 1m 34s
4:	learn: 0.0248069	total: 474ms	remaining: 1m 34s
5:	learn: 0.0205228	total: 558ms	remaining: 1m 32s
6:	learn: 0.0182054	total: 644ms	remaining: 1m 31s
7:	learn: 0.0168344	total: 736ms	remaining: 1m 31s
8:	learn: 0.0158602	total: 821ms	remaining: 1m 30s
9:	learn: 0.0152804	total: 910ms	remaining: 1m 30s
10:	learn: 0.0147192	total: 1s	remaining: 1m 29s
11:	learn: 0.0144270	total: 1.08s	remaining: 1m 29s
12:	learn: 0.0137646	total: 1.17s	remaining: 1m 29s
13:	learn: 0.0135417	total: 1.26s	remaining: 1m 28s
14:	learn: 0.0132593	total: 1.35s	remaining: 1m 28s
15:	learn: 0.0131120	total: 1.43s	remaining: 1m 28s
16:	learn: 0.0120861	total: 1.52s	remaining: 1m 28s
17:	learn: 0.0117219	total: 1.61s	remaining: 1m 28s
18:	learn: 0.0116240	total: 1.7s	remaining: 1m



Learning rate set to 0.168984
0:	learn: 0.3202042	total: 73.1ms	remaining: 1m 13s
1:	learn: 0.1314721	total: 147ms	remaining: 1m 13s
2:	learn: 0.0653899	total: 224ms	remaining: 1m 14s
3:	learn: 0.0393026	total: 294ms	remaining: 1m 13s
4:	learn: 0.0280000	total: 365ms	remaining: 1m 12s
5:	learn: 0.0224917	total: 437ms	remaining: 1m 12s
6:	learn: 0.0194728	total: 508ms	remaining: 1m 12s
7:	learn: 0.0177024	total: 579ms	remaining: 1m 11s
8:	learn: 0.0164836	total: 652ms	remaining: 1m 11s
9:	learn: 0.0157644	total: 722ms	remaining: 1m 11s
10:	learn: 0.0144523	total: 795ms	remaining: 1m 11s
11:	learn: 0.0142884	total: 872ms	remaining: 1m 11s
12:	learn: 0.0136876	total: 959ms	remaining: 1m 12s
13:	learn: 0.0132982	total: 1.03s	remaining: 1m 12s
14:	learn: 0.0130309	total: 1.1s	remaining: 1m 12s
15:	learn: 0.0128302	total: 1.18s	remaining: 1m 12s
16:	learn: 0.0126922	total: 1.25s	remaining: 1m 12s
17:	learn: 0.0125329	total: 1.32s	remaining: 1m 12s
18:	learn: 0.0118803	total: 1.39s	remaining:



18 Removed column: ft_trans_month


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.3302456	total: 59.3ms	remaining: 59.2s
1:	learn: 0.1307252	total: 141ms	remaining: 1m 10s
2:	learn: 0.0604280	total: 233ms	remaining: 1m 17s
3:	learn: 0.0355700	total: 323ms	remaining: 1m 20s
4:	learn: 0.0255098	total: 410ms	remaining: 1m 21s
5:	learn: 0.0203868	total: 497ms	remaining: 1m 22s
6:	learn: 0.0181851	total: 577ms	remaining: 1m 21s
7:	learn: 0.0168908	total: 659ms	remaining: 1m 21s
8:	learn: 0.0156927	total: 748ms	remaining: 1m 22s
9:	learn: 0.0150777	total: 830ms	remaining: 1m 22s
10:	learn: 0.0144044	total: 921ms	remaining: 1m 22s
11:	learn: 0.0140222	total: 1.01s	remaining: 1m 23s
12:	learn: 0.0137503	total: 1.1s	remaining: 1m 23s
13:	learn: 0.0134693	total: 1.18s	remaining: 1m 23s
14:	learn: 0.0131143	total: 1.28s	remaining: 1m 23s
15:	learn: 0.0130059	total: 1.37s	remaining: 1m 24s
16:	learn: 0.0127368	total: 1.46s	remaining: 1m 24s
17:	learn: 0.0119014	total: 1.54s	remaining: 1m 24s
18:	learn: 0.0117505	total: 1.63s	remaining: 



Learning rate set to 0.168984
0:	learn: 0.3546556	total: 56.9ms	remaining: 56.8s
1:	learn: 0.1508678	total: 126ms	remaining: 1m 2s
2:	learn: 0.0734372	total: 202ms	remaining: 1m 7s
3:	learn: 0.0426393	total: 275ms	remaining: 1m 8s
4:	learn: 0.0293588	total: 347ms	remaining: 1m 9s
5:	learn: 0.0230321	total: 420ms	remaining: 1m 9s
6:	learn: 0.0195722	total: 489ms	remaining: 1m 9s
7:	learn: 0.0162080	total: 559ms	remaining: 1m 9s
8:	learn: 0.0152687	total: 628ms	remaining: 1m 9s
9:	learn: 0.0146264	total: 696ms	remaining: 1m 8s
10:	learn: 0.0139419	total: 767ms	remaining: 1m 8s
11:	learn: 0.0133411	total: 840ms	remaining: 1m 9s
12:	learn: 0.0129680	total: 912ms	remaining: 1m 9s
13:	learn: 0.0124539	total: 984ms	remaining: 1m 9s
14:	learn: 0.0118250	total: 1.06s	remaining: 1m 9s
15:	learn: 0.0116735	total: 1.14s	remaining: 1m 9s
16:	learn: 0.0115227	total: 1.21s	remaining: 1m 10s
17:	learn: 0.0114187	total: 1.28s	remaining: 1m 9s
18:	learn: 0.0113057	total: 1.35s	remaining: 1m 9s
19:	learn



19 Removed column: ft_day_of_week


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.2804224	total: 93.4ms	remaining: 1m 33s
1:	learn: 0.1091558	total: 182ms	remaining: 1m 30s
2:	learn: 0.0538291	total: 275ms	remaining: 1m 31s
3:	learn: 0.0335358	total: 365ms	remaining: 1m 30s
4:	learn: 0.0247464	total: 451ms	remaining: 1m 29s
5:	learn: 0.0204703	total: 535ms	remaining: 1m 28s
6:	learn: 0.0181465	total: 624ms	remaining: 1m 28s
7:	learn: 0.0166784	total: 710ms	remaining: 1m 27s
8:	learn: 0.0159209	total: 793ms	remaining: 1m 27s
9:	learn: 0.0152220	total: 877ms	remaining: 1m 26s
10:	learn: 0.0148501	total: 964ms	remaining: 1m 26s
11:	learn: 0.0145130	total: 1.05s	remaining: 1m 26s
12:	learn: 0.0141360	total: 1.14s	remaining: 1m 26s
13:	learn: 0.0138962	total: 1.23s	remaining: 1m 26s
14:	learn: 0.0136941	total: 1.31s	remaining: 1m 26s
15:	learn: 0.0135450	total: 1.4s	remaining: 1m 26s
16:	learn: 0.0134104	total: 1.49s	remaining: 1m 26s
17:	learn: 0.0128313	total: 1.57s	remaining: 1m 25s
18:	learn: 0.0127032	total: 1.66s	remaining:



Learning rate set to 0.168984
0:	learn: 0.3060332	total: 73.7ms	remaining: 1m 13s
1:	learn: 0.1437853	total: 147ms	remaining: 1m 13s
2:	learn: 0.0685140	total: 222ms	remaining: 1m 13s
3:	learn: 0.0414260	total: 304ms	remaining: 1m 15s
4:	learn: 0.0289485	total: 388ms	remaining: 1m 17s
5:	learn: 0.0229201	total: 473ms	remaining: 1m 18s
6:	learn: 0.0199858	total: 549ms	remaining: 1m 17s
7:	learn: 0.0168813	total: 628ms	remaining: 1m 17s
8:	learn: 0.0159217	total: 699ms	remaining: 1m 16s
9:	learn: 0.0151611	total: 767ms	remaining: 1m 15s
10:	learn: 0.0141891	total: 840ms	remaining: 1m 15s
11:	learn: 0.0139260	total: 908ms	remaining: 1m 14s
12:	learn: 0.0137051	total: 977ms	remaining: 1m 14s
13:	learn: 0.0132556	total: 1.04s	remaining: 1m 13s
14:	learn: 0.0129802	total: 1.12s	remaining: 1m 13s
15:	learn: 0.0123001	total: 1.19s	remaining: 1m 13s
16:	learn: 0.0121340	total: 1.26s	remaining: 1m 12s
17:	learn: 0.0119755	total: 1.33s	remaining: 1m 12s
18:	learn: 0.0117464	total: 1.4s	remaining:



20 Removed column: amt


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.3020138	total: 84.4ms	remaining: 1m 24s
1:	learn: 0.1474536	total: 172ms	remaining: 1m 25s
2:	learn: 0.0859359	total: 260ms	remaining: 1m 26s
3:	learn: 0.0573094	total: 344ms	remaining: 1m 25s
4:	learn: 0.0444103	total: 432ms	remaining: 1m 26s
5:	learn: 0.0371613	total: 523ms	remaining: 1m 26s
6:	learn: 0.0332497	total: 612ms	remaining: 1m 26s
7:	learn: 0.0310732	total: 701ms	remaining: 1m 26s
8:	learn: 0.0297531	total: 788ms	remaining: 1m 26s
9:	learn: 0.0287021	total: 874ms	remaining: 1m 26s
10:	learn: 0.0282496	total: 958ms	remaining: 1m 26s
11:	learn: 0.0278876	total: 1.06s	remaining: 1m 27s
12:	learn: 0.0276558	total: 1.16s	remaining: 1m 28s
13:	learn: 0.0274303	total: 1.26s	remaining: 1m 28s
14:	learn: 0.0272701	total: 1.34s	remaining: 1m 28s
15:	learn: 0.0271591	total: 1.43s	remaining: 1m 28s
16:	learn: 0.0270595	total: 1.52s	remaining: 1m 27s
17:	learn: 0.0268705	total: 1.6s	remaining: 1m 27s
18:	learn: 0.0267935	total: 1.69s	remaining:



Learning rate set to 0.168984
0:	learn: 0.3272595	total: 71.9ms	remaining: 1m 11s
1:	learn: 0.1676970	total: 147ms	remaining: 1m 13s
2:	learn: 0.0983507	total: 221ms	remaining: 1m 13s
3:	learn: 0.0654109	total: 296ms	remaining: 1m 13s
4:	learn: 0.0496719	total: 368ms	remaining: 1m 13s
5:	learn: 0.0413764	total: 440ms	remaining: 1m 12s
6:	learn: 0.0363796	total: 513ms	remaining: 1m 12s
7:	learn: 0.0332407	total: 587ms	remaining: 1m 12s
8:	learn: 0.0312708	total: 662ms	remaining: 1m 12s
9:	learn: 0.0301831	total: 734ms	remaining: 1m 12s
10:	learn: 0.0292762	total: 806ms	remaining: 1m 12s
11:	learn: 0.0288447	total: 883ms	remaining: 1m 12s
12:	learn: 0.0282347	total: 953ms	remaining: 1m 12s
13:	learn: 0.0278282	total: 1.02s	remaining: 1m 12s
14:	learn: 0.0276382	total: 1.09s	remaining: 1m 11s
15:	learn: 0.0273968	total: 1.17s	remaining: 1m 11s
16:	learn: 0.0272737	total: 1.24s	remaining: 1m 11s
17:	learn: 0.0270462	total: 1.32s	remaining: 1m 11s
18:	learn: 0.0268979	total: 1.4s	remaining:



21 Removed column: category


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.3069120	total: 86.9ms	remaining: 1m 26s
1:	learn: 0.1547465	total: 150ms	remaining: 1m 14s
2:	learn: 0.0899275	total: 242ms	remaining: 1m 20s
3:	learn: 0.0613384	total: 326ms	remaining: 1m 21s
4:	learn: 0.0475662	total: 415ms	remaining: 1m 22s
5:	learn: 0.0407521	total: 502ms	remaining: 1m 23s
6:	learn: 0.0367023	total: 589ms	remaining: 1m 23s
7:	learn: 0.0346093	total: 647ms	remaining: 1m 20s
8:	learn: 0.0332081	total: 734ms	remaining: 1m 20s
9:	learn: 0.0323120	total: 824ms	remaining: 1m 21s
10:	learn: 0.0316998	total: 906ms	remaining: 1m 21s
11:	learn: 0.0313662	total: 988ms	remaining: 1m 21s
12:	learn: 0.0311146	total: 1.07s	remaining: 1m 21s
13:	learn: 0.0309787	total: 1.17s	remaining: 1m 22s
14:	learn: 0.0308420	total: 1.25s	remaining: 1m 22s
15:	learn: 0.0307504	total: 1.33s	remaining: 1m 22s
16:	learn: 0.0306661	total: 1.41s	remaining: 1m 21s
17:	learn: 0.0306270	total: 1.5s	remaining: 1m 21s
18:	learn: 0.0306167	total: 1.55s	remaining:



Learning rate set to 0.168984
0:	learn: 0.3319234	total: 70.4ms	remaining: 1m 10s
1:	learn: 0.1757155	total: 125ms	remaining: 1m 2s
2:	learn: 0.1037817	total: 197ms	remaining: 1m 5s
3:	learn: 0.0702432	total: 273ms	remaining: 1m 7s
4:	learn: 0.0540790	total: 345ms	remaining: 1m 8s
5:	learn: 0.0446259	total: 415ms	remaining: 1m 8s
6:	learn: 0.0392586	total: 485ms	remaining: 1m 8s
7:	learn: 0.0361778	total: 556ms	remaining: 1m 8s
8:	learn: 0.0342852	total: 629ms	remaining: 1m 9s
9:	learn: 0.0331128	total: 712ms	remaining: 1m 10s
10:	learn: 0.0323201	total: 795ms	remaining: 1m 11s
11:	learn: 0.0318339	total: 879ms	remaining: 1m 12s
12:	learn: 0.0315543	total: 924ms	remaining: 1m 10s
13:	learn: 0.0312427	total: 995ms	remaining: 1m 10s
14:	learn: 0.0310871	total: 1.06s	remaining: 1m 9s
15:	learn: 0.0309423	total: 1.13s	remaining: 1m 9s
16:	learn: 0.0308373	total: 1.2s	remaining: 1m 9s
17:	learn: 0.0307484	total: 1.27s	remaining: 1m 9s
18:	learn: 0.0307064	total: 1.34s	remaining: 1m 8s
19:	l



22 Removed column: ft_time_of_day


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.3261272	total: 87ms	remaining: 1m 26s
1:	learn: 0.1688976	total: 178ms	remaining: 1m 28s
2:	learn: 0.0991129	total: 288ms	remaining: 1m 35s
3:	learn: 0.0682919	total: 376ms	remaining: 1m 33s
4:	learn: 0.0522877	total: 466ms	remaining: 1m 32s
5:	learn: 0.0444365	total: 552ms	remaining: 1m 31s
6:	learn: 0.0395711	total: 637ms	remaining: 1m 30s
7:	learn: 0.0368538	total: 728ms	remaining: 1m 30s
8:	learn: 0.0354172	total: 811ms	remaining: 1m 29s
9:	learn: 0.0344367	total: 896ms	remaining: 1m 28s
10:	learn: 0.0337745	total: 985ms	remaining: 1m 28s
11:	learn: 0.0333556	total: 1.07s	remaining: 1m 27s
12:	learn: 0.0330689	total: 1.16s	remaining: 1m 27s
13:	learn: 0.0328940	total: 1.25s	remaining: 1m 27s
14:	learn: 0.0327413	total: 1.33s	remaining: 1m 27s
15:	learn: 0.0326468	total: 1.43s	remaining: 1m 27s
16:	learn: 0.0325775	total: 1.52s	remaining: 1m 27s
17:	learn: 0.0325286	total: 1.61s	remaining: 1m 27s
18:	learn: 0.0324928	total: 1.69s	remaining: 



Learning rate set to 0.168984
0:	learn: 0.3512939	total: 90ms	remaining: 1m 29s
1:	learn: 0.1887790	total: 158ms	remaining: 1m 18s
2:	learn: 0.1143343	total: 229ms	remaining: 1m 16s
3:	learn: 0.0772311	total: 299ms	remaining: 1m 14s
4:	learn: 0.0582531	total: 366ms	remaining: 1m 12s
5:	learn: 0.0482660	total: 430ms	remaining: 1m 11s
6:	learn: 0.0426049	total: 502ms	remaining: 1m 11s
7:	learn: 0.0389170	total: 570ms	remaining: 1m 10s
8:	learn: 0.0367406	total: 639ms	remaining: 1m 10s
9:	learn: 0.0353972	total: 705ms	remaining: 1m 9s
10:	learn: 0.0344507	total: 780ms	remaining: 1m 10s
11:	learn: 0.0339031	total: 851ms	remaining: 1m 10s
12:	learn: 0.0335366	total: 922ms	remaining: 1m 10s
13:	learn: 0.0333282	total: 972ms	remaining: 1m 8s
14:	learn: 0.0330501	total: 1.04s	remaining: 1m 8s
15:	learn: 0.0328949	total: 1.11s	remaining: 1m 8s
16:	learn: 0.0327711	total: 1.19s	remaining: 1m 8s
17:	learn: 0.0327036	total: 1.26s	remaining: 1m 8s
18:	learn: 0.0326651	total: 1.33s	remaining: 1m 8s




23 Removed column: ft_mean_amt_per_user


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


Learning rate set to 0.185877
0:	learn: 0.3296692	total: 84.5ms	remaining: 1m 24s
1:	learn: 0.1715330	total: 162ms	remaining: 1m 20s
2:	learn: 0.1025169	total: 240ms	remaining: 1m 19s
3:	learn: 0.0705369	total: 323ms	remaining: 1m 20s
4:	learn: 0.0547446	total: 399ms	remaining: 1m 19s
5:	learn: 0.0464817	total: 486ms	remaining: 1m 20s
6:	learn: 0.0419352	total: 553ms	remaining: 1m 18s
7:	learn: 0.0393357	total: 638ms	remaining: 1m 19s
8:	learn: 0.0378041	total: 716ms	remaining: 1m 18s
9:	learn: 0.0368776	total: 802ms	remaining: 1m 19s
10:	learn: 0.0363095	total: 877ms	remaining: 1m 18s
11:	learn: 0.0359512	total: 957ms	remaining: 1m 18s
12:	learn: 0.0357300	total: 1.06s	remaining: 1m 20s
13:	learn: 0.0355868	total: 1.15s	remaining: 1m 21s
14:	learn: 0.0354928	total: 1.23s	remaining: 1m 20s
15:	learn: 0.0354350	total: 1.31s	remaining: 1m 20s
16:	learn: 0.0353935	total: 1.38s	remaining: 1m 19s
17:	learn: 0.0353740	total: 1.46s	remaining: 1m 19s
18:	learn: 0.0353538	total: 1.53s	remaining



Learning rate set to 0.168984
0:	learn: 0.3542890	total: 69.3ms	remaining: 1m 9s
1:	learn: 0.1935448	total: 140ms	remaining: 1m 10s
2:	learn: 0.1177921	total: 203ms	remaining: 1m 7s
3:	learn: 0.0805775	total: 279ms	remaining: 1m 9s
4:	learn: 0.0613140	total: 349ms	remaining: 1m 9s
5:	learn: 0.0508165	total: 413ms	remaining: 1m 8s
6:	learn: 0.0448290	total: 480ms	remaining: 1m 8s
7:	learn: 0.0412870	total: 550ms	remaining: 1m 8s
8:	learn: 0.0391350	total: 625ms	remaining: 1m 8s
9:	learn: 0.0377996	total: 697ms	remaining: 1m 9s
10:	learn: 0.0369482	total: 760ms	remaining: 1m 8s
11:	learn: 0.0364010	total: 814ms	remaining: 1m 6s
12:	learn: 0.0360463	total: 882ms	remaining: 1m 6s
13:	learn: 0.0358079	total: 948ms	remaining: 1m 6s
14:	learn: 0.0356514	total: 1.02s	remaining: 1m 6s
15:	learn: 0.0355450	total: 1.08s	remaining: 1m 6s
16:	learn: 0.0354748	total: 1.15s	remaining: 1m 6s
17:	learn: 0.0354244	total: 1.21s	remaining: 1m 6s
18:	learn: 0.0353873	total: 1.27s	remaining: 1m 5s
19:	learn



24 Removed column: ft_state_fraud_rate


  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


ValueError: at least one array or dtype is required

In [33]:
first_iter = True
cols = 'None'
for res in feature_sel_res:
    print(f'Removed columns : {res['removed_col']} train: {res['train']['recall']} - test: {res['test']['recall']}')
    if first_iter == False:
        print(f'Train % change: {100*((res['train']['recall']*100 - prev_train_recall)/prev_train_recall )} - Test % change: {100*((res['test']['recall']*100 -prev_test_recall)/prev_test_recall)}')
    prev_train_recall = res['train']['recall']*100
    prev_test_recall = res['test']['recall']*100
    first_iter = False

Removed columns : None train: 0.9774123241529622 - test: 0.8447606727037517
Removed columns : state train: 0.9799881117495541 - test: 0.8525226390685641
Train % change: 0.26353131968375526 - Test % change: 0.918836140888212
Removed columns : ft_age_group train: 0.9793936992272637 - test: 0.8227684346701164
Train % change: -0.060655074807928415 - Test % change: -3.4901365705614653
Removed columns : ft_trans_hour train: 0.9647315236774321 - test: 0.796895213454075
Train % change: -1.4970665587699796 - Test % change: -3.1446540880503213
Removed columns : ft_amt_deviation train: 0.9631464236179909 - test: 0.7943078913324709
Train % change: -0.16430478537687193 - Test % change: -0.3246753246753201
Removed columns : city train: 0.9564097483653655 - test: 0.795601552393273
Train % change: -0.6994445587327692 - Test % change: 0.1628664495114073
Removed columns : city_pop train: 0.9486823855755895 - test: 0.7813712807244502
Train % change: -0.8079552517091292 - Test % change: -1.788617886178863