# **Predictive Analytics: Shinkansen Passenger Satisfaction**

## Data Preprocessing

**1. Import necessary libraries**

In [None]:
import pandas as pd
import numpy as np
import regex as re
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import make_pipeline
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import warnings
warnings.simplefilter("ignore")

**2. Load the training and test data separately**

In [3]:
#train data
surveydata_train = pd.read_csv("Surveydata_train.csv")
traveldata_train = pd.read_csv("Traveldata_train.csv")
#test data
surveydata_test = pd.read_csv("Surveydata_test.csv")
traveldata_test = pd.read_csv("Traveldata_test.csv")

**3. Understand the data (check for each of the following in both the train and test dataset)**
<ol>
<li>Check a sample of the data</li>
<li>Use the info() and describe() functions for more information</li>
<li>Look for the presence of null values in the dataset</li>
<li>Look for the presence of bad data or unwanted characters like "$" or "#" in the numerical columns</li>
</ol>

In [4]:
#A. Check a sample of the data
surveydata_train.sample(5)

Unnamed: 0,ID,Overall_Experience,Seat_Comfort,Seat_Class,Arrival_Time_Convenient,Catering,Platform_Location,Onboard_Wifi_Service,Onboard_Entertainment,Online_Support,Ease_of_Online_Booking,Onboard_Service,Legroom,Baggage_Handling,CheckIn_Service,Cleanliness,Online_Boarding
68675,98868676,0,Poor,Ordinary,Needs Improvement,Poor,Needs Improvement,Needs Improvement,Poor,Needs Improvement,Needs Improvement,Acceptable,Acceptable,Needs Improvement,Good,Acceptable,Needs Improvement
36061,98836062,1,Good,Ordinary,Needs Improvement,Needs Improvement,Needs Improvement,Good,Good,Good,Good,Poor,Good,Needs Improvement,Excellent,Poor,Good
88843,98888844,1,Good,Green Car,Good,Good,Convenient,Good,Good,Good,Good,Acceptable,Excellent,Poor,Good,Needs Improvement,Good
57840,98857841,1,Good,Green Car,Poor,Acceptable,Manageable,Good,Good,Good,Good,Excellent,Acceptable,Excellent,Good,Needs Improvement,Good
70269,98870270,1,Poor,Ordinary,Poor,Poor,Inconvenient,Acceptable,Excellent,Good,Good,Good,Excellent,Good,Excellent,Good,Good


In [5]:
#A. Check a sample of the data
traveldata_train.sample(5)

Unnamed: 0,ID,Gender,Customer_Type,Age,Type_Travel,Travel_Class,Travel_Distance,Departure_Delay_in_Mins,Arrival_Delay_in_Mins
93683,98893684,Male,Loyal Customer,27.0,Business Travel,Eco,1255,2.0,0.0
50768,98850769,Female,Loyal Customer,26.0,Personal Travel,Eco,3317,0.0,0.0
48747,98848748,Male,,29.0,Business Travel,Eco,2115,20.0,16.0
83971,98883972,Male,Loyal Customer,22.0,Personal Travel,Eco,1268,36.0,72.0
55949,98855950,Male,Loyal Customer,47.0,Personal Travel,Eco,2238,0.0,0.0


In [6]:
#B. Use the info() and describe() functions for more information
surveydata_train.info()
surveydata_train.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94379 entries, 0 to 94378
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   ID                       94379 non-null  int64 
 1   Overall_Experience       94379 non-null  int64 
 2   Seat_Comfort             94318 non-null  object
 3   Seat_Class               94379 non-null  object
 4   Arrival_Time_Convenient  85449 non-null  object
 5   Catering                 85638 non-null  object
 6   Platform_Location        94349 non-null  object
 7   Onboard_Wifi_Service     94349 non-null  object
 8   Onboard_Entertainment    94361 non-null  object
 9   Online_Support           94288 non-null  object
 10  Ease_of_Online_Booking   94306 non-null  object
 11  Onboard_Service          86778 non-null  object
 12  Legroom                  94289 non-null  object
 13  Baggage_Handling         94237 non-null  object
 14  CheckIn_Service          94302 non-nul

Unnamed: 0,ID,Overall_Experience
count,94379.0,94379.0
mean,98847190.0,0.546658
std,27245.01,0.497821
min,98800000.0,0.0
25%,98823600.0,0.0
50%,98847190.0,1.0
75%,98870780.0,1.0
max,98894380.0,1.0


In [7]:
#B. Use the info() and describe() functions for more information
traveldata_train.info()
traveldata_train.describe()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 94379 entries, 0 to 94378
Data columns (total 9 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   ID                       94379 non-null  int64  
 1   Gender                   94302 non-null  object 
 2   Customer_Type            85428 non-null  object 
 3   Age                      94346 non-null  float64
 4   Type_Travel              85153 non-null  object 
 5   Travel_Class             94379 non-null  object 
 6   Travel_Distance          94379 non-null  int64  
 7   Departure_Delay_in_Mins  94322 non-null  float64
 8   Arrival_Delay_in_Mins    94022 non-null  float64
dtypes: float64(3), int64(2), object(4)
memory usage: 6.5+ MB


Unnamed: 0,ID,Age,Travel_Distance,Departure_Delay_in_Mins,Arrival_Delay_in_Mins
count,94379.0,94346.0,94379.0,94322.0,94022.0
mean,98847190.0,39.419647,1978.888185,14.647092,15.005222
std,27245.01,15.116632,1027.961019,38.138781,38.439409
min,98800000.0,7.0,50.0,0.0,0.0
25%,98823600.0,27.0,1359.0,0.0,0.0
50%,98847190.0,40.0,1923.0,0.0,0.0
75%,98870780.0,51.0,2538.0,12.0,13.0
max,98894380.0,85.0,6951.0,1592.0,1584.0


In [8]:
#C. Look for the presence of null values in the dataset
surveydata_train.isnull().values.any()

True

In [9]:
#C. Look for the presence of null values in the dataset
traveldata_train.isnull().values.any()

True

In [10]:
#D. Look for the presence of bad data or unwanted characters like "$" or "#" in the numerical columns
surveydata_train.describe().columns.astype(str).str.contains("($|#)").any()

True

In [11]:
#D. Look for the presence of bad data or unwanted characters like "$" or "#" in the numerical columns
traveldata_train.describe().columns.astype(str).str.contains("($|#)").any()

True

**4. Clean the data**
<ol>
<li>Treat for missing values in both the train & test set</li>
<li>Remove bad data values in both the train & test set</li>
<li>Encode the categorical object variables in both the train & test set</li>
<li>Perform Feature Engineering if necessary</li>
<li>Scale/Normalize the dataset if necessary</li>
</ol>

In [12]:
def dataframe_cleaning(df):
    # Before cleaning any data, it is important to transform Categorical values to numerical values
    # Retrieve categorical columns, which have data type as "object"
    df_object_columns = df.select_dtypes(include=['object']).columns
    # One-hot encoding for categorical variables
    df_encoded = pd.get_dummies(df, columns=df_object_columns, dummy_na=True)
    #A. Treat for missing values in both the train & test set
    imputer = KNNImputer(n_neighbors=15, 
                         weights='uniform', 
                         metric='nan_euclidean', 
                         missing_values=np.nan, 
                         add_indicator=False)
    #np array is created
    df_imputed = imputer.fit_transform(df_encoded)
    #back to dataframe
    df_without_nans = pd.DataFrame(data=df_imputed, columns=df_encoded.columns)
    
    return df_without_nans

In [13]:
#train data
surveydata_train_clean = dataframe_cleaning(surveydata_train)
traveldata_train_clean = dataframe_cleaning(traveldata_train)
#test data
surveydata_test_clean = dataframe_cleaning(surveydata_test)
traveldata_test_clean = dataframe_cleaning(traveldata_test)

In [14]:
#if a column is not present on the test set, then it is not important in the train set. Sans the target variable "Overall_Experience"
def shape_equalizer(df1, df2):
    """train, then test"""
    df1_columns = df1.columns
    df2_columns = df2.columns
    difference = list(set(df1_columns).difference(set(df2_columns)))
    if "Overall_Experience" in difference:
        difference.pop(difference.index("Overall_Experience"))
    df1 = df1.drop(difference, axis=1)
    return df1, df2

In [15]:
#survey data
surveydata_train_equalized, surveydata_test_equalized = shape_equalizer(surveydata_train_clean, surveydata_test_clean)
#travel data
traveldata_train_equalized, traveldata_test_equalized = shape_equalizer(traveldata_train_clean, traveldata_test_clean)

In [16]:
if surveydata_train_equalized.shape[0] == traveldata_train_equalized.shape[0] and surveydata_test_equalized.shape[0] == traveldata_test_equalized.shape[0]:
    print("Same number of rows between survey and travel data sets.")

Same number of rows between survey and travel data sets.


In [17]:
# -1 due to target column "Overall_Experience"
if surveydata_train_equalized.shape[1]-1 == surveydata_test_equalized.shape[1] and traveldata_train_equalized.shape[1] == traveldata_test_equalized.shape[1]:
    print("Same number of columns between test and train data sets.")

Same number of columns between test and train data sets.


In [18]:
#last, join the two datasets for train and the two datasets for test
#train
train_data = traveldata_train_equalized.merge(surveydata_train_equalized, on='ID')
#test
test_data = traveldata_test_equalized.merge(surveydata_test_equalized, on='ID')

## Model Building

In [19]:
# train, test, split for all
X = train_data.drop('Overall_Experience', axis=1)
y = train_data['Overall_Experience']

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# save function for all
def results_to_csv(y_pred, var_name):
    data = test_data.copy()
    data['Overall_Experience'] = y_pred
    result = data[['ID', 'Overall_Experience']]
    #to integers
    result[['ID', 'Overall_Experience']] = result[['ID', 'Overall_Experience']].astype(int)
    #print head
    print(result.head(5))
    #save as csv
    name = "_".join(var_name.split('_')[-2:]) + '_result.csv'
    return result.to_csv(name, index=False)

### catboost

In [None]:
# Fit the model
# CatBoostError: only one of the parameters iterations, n_estimators, num_boost_round, num_trees should be initialized.
cat = make_pipeline(
    StandardScaler(), 
    CatBoostClassifier(n_estimators=4000,#iterations=4000,
                       depth=12, 
                       learning_rate=0.02, 
                       loss_function='CrossEntropy',
                       verbose=2,
                       random_seed=42,
                       thread_count=-1)
)

cat.fit(X_train, y_train)

In [None]:
# Train the model
y_pred_train_cat = cat.predict(X_train)

In [None]:
# Evaluate model on training data
print(confusion_matrix(y_train, y_pred_train_cat))

In [None]:
# Make predictions on validation data
y_pred_val_extra_trees = cat.predict(X_val)
cm = confusion_matrix(y_val, y_pred_val_extra_trees)
#pretty plot
plt.figure(figsize=(2,2))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')

# Add labels (depends on your problem)
plt.xlabel('Predicted')
plt.ylabel('Actual')

plt.show()

In [None]:
[[TN,FP],[FN,TP]] = cm
Accuracy = (TN + TP) / (TN + FP + FN + TP)
Precision = TP / (TP + FP)
Recall = TP / (TP + FN)
F1_score = 2 * (Precision * Recall) / (Precision + Recall)
maybe_acc = Accuracy - 0.003
target = 0.9576428
print(f"cm:\n{cm}\nAccuracy:\t{Accuracy:.7f}\nPrecision:\t{Precision:.7f}\nRecall:\t\t{Recall:.7f}\nF1_score:\t{F1_score:.7f}\n\nMaybe Acc:\t{maybe_acc:.7f}\nTarget:\t\t{target:.7f}")


In [None]:
# Make prediction
y_pred_test_cat_boost = cat.predict(test_data)
y_pred_test_cat_boost


In [None]:
results_to_csv(y_pred_test_cat_boost, "y_pred_test_cat_boost")


## Best model

In [24]:
from sklearn.experimental import enable_halving_search_cv  # explicitly require this experimental feature
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.base import BaseEstimator

# Define the parameter grid
first_param_grid = {
    'n_estimators': [3000, 3500, 4000],
    'depth': [8, 10, 12],
    'learning_rate' : [0.03, 0.1, 0.2]
}
#best_params results were {'depth': 8, 'learning_rate': 0.03, 'n_estimators': 4000}
#thus a modification ensues
second_param_grid = {
    'n_estimators': [4000, 4500, 5000],
    'depth': [7, 8, 9],
    'learning_rate' : [0.01, 0.02, 0.03]
}
#best_params results were {'depth': 8, 'learning_rate': 0.01, 'n_estimators': 5000}
#thus another modification ensues
second_param_grid = {
    'n_estimators': [5000, 6000, 7000],
    'learning_rate' : [0.01, 0.005, 0.001]
}

class CatBoostClassifierWithEarlyStopping(CatBoostClassifier):
    def fit(self, X, y, **fit_params):
        if 'eval_set' not in fit_params:
            raise ValueError("`eval_set` must be provided for early stopping")
        super().fit(X, y, early_stopping_rounds=10, **fit_params)
        return self

# Define your custom classifier
catb = CatBoostClassifierWithEarlyStopping()

# Make sure to pass the validation set as `eval_set` when fitting
halving_search = HalvingGridSearchCV(estimator = catb,
                                     param_grid = param_grid,
                                     cv = 4,
                                     scoring='accuracy',
                                     resource='n_samples',
                                     max_resources='auto',
                                     factor=2,
                                     n_jobs = -1,
                                     verbose = 2)

# Fit the grid search to the data
halving_search.fit(X_train, y_train, eval_set=(X_val, y_val))

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 4718
max_resources_: 75503
aggressive_elimination: False
factor: 2
----------
iter: 0
n_candidates: 27
n_resources: 4718
Fitting 3 folds for each of 27 candidates, totalling 81 fits
----------
iter: 1
n_candidates: 14
n_resources: 9436
Fitting 3 folds for each of 14 candidates, totalling 42 fits
----------
iter: 2
n_candidates: 7
n_resources: 18872
Fitting 3 folds for each of 7 candidates, totalling 21 fits
----------
iter: 3
n_candidates: 4
n_resources: 37744
Fitting 3 folds for each of 4 candidates, totalling 12 fits
----------
iter: 4
n_candidates: 2
n_resources: 75488
Fitting 3 folds for each of 2 candidates, totalling 6 fits
0:	learn: 0.6800600	test: 0.6800604	best: 0.6800604 (0)	total: 19.8ms	remaining: 1m 38s
1:	learn: 0.6675425	test: 0.6675260	best: 0.6675260 (1)	total: 38.5ms	remaining: 1m 36s
2:	learn: 0.6552898	test: 0.6552874	best: 0.6552874 (2)	total: 56.2ms	remaining: 1m 33s
3:	learn: 0.6435

87:	learn: 0.2810963	test: 0.2807548	best: 0.2807548 (87)	total: 1.95s	remaining: 1m 48s
88:	learn: 0.2798934	test: 0.2795443	best: 0.2795443 (88)	total: 1.97s	remaining: 1m 48s
89:	learn: 0.2785206	test: 0.2781556	best: 0.2781556 (89)	total: 1.99s	remaining: 1m 48s
90:	learn: 0.2768940	test: 0.2765428	best: 0.2765428 (90)	total: 2.02s	remaining: 1m 48s
91:	learn: 0.2752179	test: 0.2748741	best: 0.2748741 (91)	total: 2.04s	remaining: 1m 48s
92:	learn: 0.2732133	test: 0.2728639	best: 0.2728639 (92)	total: 2.06s	remaining: 1m 48s
93:	learn: 0.2718423	test: 0.2714771	best: 0.2714771 (93)	total: 2.08s	remaining: 1m 48s
94:	learn: 0.2701528	test: 0.2698140	best: 0.2698140 (94)	total: 2.1s	remaining: 1m 48s
95:	learn: 0.2686394	test: 0.2683077	best: 0.2683077 (95)	total: 2.12s	remaining: 1m 48s
96:	learn: 0.2676051	test: 0.2672768	best: 0.2672768 (96)	total: 2.14s	remaining: 1m 48s
97:	learn: 0.2665427	test: 0.2662398	best: 0.2662398 (97)	total: 2.16s	remaining: 1m 48s
98:	learn: 0.2650706	t

179:	learn: 0.2004708	test: 0.2013103	best: 0.2013103 (179)	total: 3.86s	remaining: 1m 43s
180:	learn: 0.2001829	test: 0.2010534	best: 0.2010534 (180)	total: 3.88s	remaining: 1m 43s
181:	learn: 0.1998621	test: 0.2007524	best: 0.2007524 (181)	total: 3.91s	remaining: 1m 43s
182:	learn: 0.1992234	test: 0.2001197	best: 0.2001197 (182)	total: 3.93s	remaining: 1m 43s
183:	learn: 0.1986135	test: 0.1995096	best: 0.1995096 (183)	total: 3.95s	remaining: 1m 43s
184:	learn: 0.1981438	test: 0.1990403	best: 0.1990403 (184)	total: 3.97s	remaining: 1m 43s
185:	learn: 0.1977614	test: 0.1986508	best: 0.1986508 (185)	total: 3.99s	remaining: 1m 43s
186:	learn: 0.1972655	test: 0.1981957	best: 0.1981957 (186)	total: 4.01s	remaining: 1m 43s
187:	learn: 0.1969090	test: 0.1978724	best: 0.1978724 (187)	total: 4.03s	remaining: 1m 43s
188:	learn: 0.1962994	test: 0.1972728	best: 0.1972728 (188)	total: 4.05s	remaining: 1m 43s
189:	learn: 0.1957871	test: 0.1967600	best: 0.1967600 (189)	total: 4.07s	remaining: 1m 42s

274:	learn: 0.1680969	test: 0.1700385	best: 0.1700385 (274)	total: 5.84s	remaining: 1m 40s
275:	learn: 0.1677827	test: 0.1697470	best: 0.1697470 (275)	total: 5.86s	remaining: 1m 40s
276:	learn: 0.1675181	test: 0.1695024	best: 0.1695024 (276)	total: 5.88s	remaining: 1m 40s
277:	learn: 0.1672770	test: 0.1692847	best: 0.1692847 (277)	total: 5.9s	remaining: 1m 40s
278:	learn: 0.1668637	test: 0.1688646	best: 0.1688646 (278)	total: 5.92s	remaining: 1m 40s
279:	learn: 0.1665150	test: 0.1685310	best: 0.1685310 (279)	total: 5.93s	remaining: 1m 40s
280:	learn: 0.1663536	test: 0.1683743	best: 0.1683743 (280)	total: 5.96s	remaining: 1m 40s
281:	learn: 0.1662194	test: 0.1682578	best: 0.1682578 (281)	total: 5.98s	remaining: 1m 40s
282:	learn: 0.1659004	test: 0.1679479	best: 0.1679479 (282)	total: 6s	remaining: 1m 40s
283:	learn: 0.1657195	test: 0.1677714	best: 0.1677714 (283)	total: 6.02s	remaining: 1m 39s
284:	learn: 0.1655141	test: 0.1675811	best: 0.1675811 (284)	total: 6.04s	remaining: 1m 39s
285

368:	learn: 0.1505877	test: 0.1535860	best: 0.1535860 (368)	total: 7.73s	remaining: 1m 37s
369:	learn: 0.1504724	test: 0.1535001	best: 0.1535001 (369)	total: 7.75s	remaining: 1m 37s
370:	learn: 0.1503522	test: 0.1533811	best: 0.1533811 (370)	total: 7.78s	remaining: 1m 37s
371:	learn: 0.1501790	test: 0.1532022	best: 0.1532022 (371)	total: 7.8s	remaining: 1m 37s
372:	learn: 0.1500816	test: 0.1531094	best: 0.1531094 (372)	total: 7.82s	remaining: 1m 36s
373:	learn: 0.1498558	test: 0.1528875	best: 0.1528875 (373)	total: 7.84s	remaining: 1m 36s
374:	learn: 0.1497804	test: 0.1528306	best: 0.1528306 (374)	total: 7.86s	remaining: 1m 36s
375:	learn: 0.1495668	test: 0.1526285	best: 0.1526285 (375)	total: 7.88s	remaining: 1m 36s
376:	learn: 0.1495061	test: 0.1525850	best: 0.1525850 (376)	total: 7.9s	remaining: 1m 36s
377:	learn: 0.1493625	test: 0.1524514	best: 0.1524514 (377)	total: 7.92s	remaining: 1m 36s
378:	learn: 0.1491318	test: 0.1522169	best: 0.1522169 (378)	total: 7.94s	remaining: 1m 36s
3

467:	learn: 0.1382818	test: 0.1423562	best: 0.1423562 (467)	total: 9.67s	remaining: 1m 33s
468:	learn: 0.1382271	test: 0.1423143	best: 0.1423143 (468)	total: 9.69s	remaining: 1m 33s
469:	learn: 0.1381379	test: 0.1422284	best: 0.1422284 (469)	total: 9.71s	remaining: 1m 33s
470:	learn: 0.1380729	test: 0.1421685	best: 0.1421685 (470)	total: 9.72s	remaining: 1m 33s
471:	learn: 0.1379779	test: 0.1420751	best: 0.1420751 (471)	total: 9.74s	remaining: 1m 33s
472:	learn: 0.1379179	test: 0.1420165	best: 0.1420165 (472)	total: 9.76s	remaining: 1m 33s
473:	learn: 0.1378128	test: 0.1419166	best: 0.1419166 (473)	total: 9.78s	remaining: 1m 33s
474:	learn: 0.1377626	test: 0.1418791	best: 0.1418791 (474)	total: 9.8s	remaining: 1m 33s
475:	learn: 0.1376393	test: 0.1417750	best: 0.1417750 (475)	total: 9.81s	remaining: 1m 33s
476:	learn: 0.1375052	test: 0.1416505	best: 0.1416505 (476)	total: 9.83s	remaining: 1m 33s
477:	learn: 0.1373331	test: 0.1414893	best: 0.1414893 (477)	total: 9.85s	remaining: 1m 33s


563:	learn: 0.1294307	test: 0.1345099	best: 0.1345099 (563)	total: 11.3s	remaining: 1m 29s
564:	learn: 0.1293099	test: 0.1343953	best: 0.1343953 (564)	total: 11.4s	remaining: 1m 29s
565:	learn: 0.1292251	test: 0.1343164	best: 0.1343164 (565)	total: 11.4s	remaining: 1m 29s
566:	learn: 0.1291478	test: 0.1342362	best: 0.1342362 (566)	total: 11.4s	remaining: 1m 29s
567:	learn: 0.1290352	test: 0.1341382	best: 0.1341382 (567)	total: 11.4s	remaining: 1m 29s
568:	learn: 0.1289753	test: 0.1340863	best: 0.1340863 (568)	total: 11.4s	remaining: 1m 29s
569:	learn: 0.1288949	test: 0.1340126	best: 0.1340126 (569)	total: 11.5s	remaining: 1m 29s
570:	learn: 0.1288542	test: 0.1339688	best: 0.1339688 (570)	total: 11.5s	remaining: 1m 28s
571:	learn: 0.1287445	test: 0.1338622	best: 0.1338622 (571)	total: 11.5s	remaining: 1m 28s
572:	learn: 0.1286737	test: 0.1338141	best: 0.1338141 (572)	total: 11.5s	remaining: 1m 28s
573:	learn: 0.1286101	test: 0.1337616	best: 0.1337616 (573)	total: 11.5s	remaining: 1m 28s

660:	learn: 0.1217268	test: 0.1277019	best: 0.1277019 (660)	total: 13.1s	remaining: 1m 25s
661:	learn: 0.1216754	test: 0.1276693	best: 0.1276693 (661)	total: 13.1s	remaining: 1m 25s
662:	learn: 0.1215980	test: 0.1275899	best: 0.1275899 (662)	total: 13.1s	remaining: 1m 25s
663:	learn: 0.1215497	test: 0.1275537	best: 0.1275537 (663)	total: 13.1s	remaining: 1m 25s
664:	learn: 0.1214376	test: 0.1274636	best: 0.1274636 (664)	total: 13.1s	remaining: 1m 25s
665:	learn: 0.1213535	test: 0.1273870	best: 0.1273870 (665)	total: 13.2s	remaining: 1m 25s
666:	learn: 0.1213231	test: 0.1273604	best: 0.1273604 (666)	total: 13.2s	remaining: 1m 25s
667:	learn: 0.1212683	test: 0.1273273	best: 0.1273273 (667)	total: 13.2s	remaining: 1m 25s
668:	learn: 0.1212065	test: 0.1272796	best: 0.1272796 (668)	total: 13.2s	remaining: 1m 25s
669:	learn: 0.1211299	test: 0.1272169	best: 0.1272169 (669)	total: 13.2s	remaining: 1m 25s
670:	learn: 0.1210756	test: 0.1271749	best: 0.1271749 (670)	total: 13.2s	remaining: 1m 25s

756:	learn: 0.1160917	test: 0.1231031	best: 0.1231031 (756)	total: 15s	remaining: 1m 24s
757:	learn: 0.1160267	test: 0.1230529	best: 0.1230529 (757)	total: 15s	remaining: 1m 24s
758:	learn: 0.1159318	test: 0.1229597	best: 0.1229597 (758)	total: 15s	remaining: 1m 24s
759:	learn: 0.1158822	test: 0.1229171	best: 0.1229171 (759)	total: 15.1s	remaining: 1m 24s
760:	learn: 0.1158385	test: 0.1228810	best: 0.1228810 (760)	total: 15.1s	remaining: 1m 24s
761:	learn: 0.1158114	test: 0.1228709	best: 0.1228709 (761)	total: 15.1s	remaining: 1m 24s
762:	learn: 0.1157618	test: 0.1228227	best: 0.1228227 (762)	total: 15.1s	remaining: 1m 24s
763:	learn: 0.1157363	test: 0.1228117	best: 0.1228117 (763)	total: 15.2s	remaining: 1m 24s
764:	learn: 0.1156895	test: 0.1227799	best: 0.1227799 (764)	total: 15.2s	remaining: 1m 24s
765:	learn: 0.1156241	test: 0.1227214	best: 0.1227214 (765)	total: 15.2s	remaining: 1m 23s
766:	learn: 0.1155925	test: 0.1227052	best: 0.1227052 (766)	total: 15.2s	remaining: 1m 23s
767:	

854:	learn: 0.1109794	test: 0.1190728	best: 0.1190728 (854)	total: 16.9s	remaining: 1m 22s
855:	learn: 0.1109489	test: 0.1190575	best: 0.1190575 (855)	total: 16.9s	remaining: 1m 22s
856:	learn: 0.1108883	test: 0.1190100	best: 0.1190100 (856)	total: 17s	remaining: 1m 21s
857:	learn: 0.1107978	test: 0.1189303	best: 0.1189303 (857)	total: 17s	remaining: 1m 21s
858:	learn: 0.1107433	test: 0.1188914	best: 0.1188914 (858)	total: 17s	remaining: 1m 21s
859:	learn: 0.1106557	test: 0.1188044	best: 0.1188044 (859)	total: 17s	remaining: 1m 21s
860:	learn: 0.1105739	test: 0.1187226	best: 0.1187226 (860)	total: 17s	remaining: 1m 21s
861:	learn: 0.1105362	test: 0.1186949	best: 0.1186949 (861)	total: 17.1s	remaining: 1m 21s
862:	learn: 0.1104532	test: 0.1186254	best: 0.1186254 (862)	total: 17.1s	remaining: 1m 21s
863:	learn: 0.1104223	test: 0.1186009	best: 0.1186009 (863)	total: 17.1s	remaining: 1m 21s
864:	learn: 0.1103730	test: 0.1185621	best: 0.1185621 (864)	total: 17.1s	remaining: 1m 21s
865:	lear

951:	learn: 0.1069016	test: 0.1159393	best: 0.1159393 (951)	total: 18.7s	remaining: 1m 19s
952:	learn: 0.1068675	test: 0.1159132	best: 0.1159132 (952)	total: 18.7s	remaining: 1m 19s
953:	learn: 0.1068058	test: 0.1158650	best: 0.1158650 (953)	total: 18.7s	remaining: 1m 19s
954:	learn: 0.1067737	test: 0.1158429	best: 0.1158429 (954)	total: 18.7s	remaining: 1m 19s
955:	learn: 0.1067468	test: 0.1158360	best: 0.1158360 (955)	total: 18.7s	remaining: 1m 19s
956:	learn: 0.1067092	test: 0.1158087	best: 0.1158087 (956)	total: 18.7s	remaining: 1m 19s
957:	learn: 0.1066771	test: 0.1157898	best: 0.1157898 (957)	total: 18.8s	remaining: 1m 19s
958:	learn: 0.1066433	test: 0.1157647	best: 0.1157647 (958)	total: 18.8s	remaining: 1m 19s
959:	learn: 0.1066145	test: 0.1157425	best: 0.1157425 (959)	total: 18.8s	remaining: 1m 19s
960:	learn: 0.1065897	test: 0.1157316	best: 0.1157316 (960)	total: 18.8s	remaining: 1m 19s
961:	learn: 0.1065409	test: 0.1156966	best: 0.1156966 (961)	total: 18.8s	remaining: 1m 19s

1050:	learn: 0.1034539	test: 0.1135496	best: 0.1135496 (1050)	total: 20.4s	remaining: 1m 16s
1051:	learn: 0.1034346	test: 0.1135428	best: 0.1135428 (1051)	total: 20.4s	remaining: 1m 16s
1052:	learn: 0.1034042	test: 0.1135204	best: 0.1135204 (1052)	total: 20.4s	remaining: 1m 16s
1053:	learn: 0.1033827	test: 0.1135092	best: 0.1135092 (1053)	total: 20.5s	remaining: 1m 16s
1054:	learn: 0.1033455	test: 0.1134813	best: 0.1134813 (1054)	total: 20.5s	remaining: 1m 16s
1055:	learn: 0.1033266	test: 0.1134652	best: 0.1134652 (1055)	total: 20.5s	remaining: 1m 16s
1056:	learn: 0.1033033	test: 0.1134445	best: 0.1134445 (1056)	total: 20.5s	remaining: 1m 16s
1057:	learn: 0.1032817	test: 0.1134412	best: 0.1134412 (1057)	total: 20.5s	remaining: 1m 16s
1058:	learn: 0.1032633	test: 0.1134312	best: 0.1134312 (1058)	total: 20.6s	remaining: 1m 16s
1059:	learn: 0.1032342	test: 0.1134112	best: 0.1134112 (1059)	total: 20.6s	remaining: 1m 16s
1060:	learn: 0.1032033	test: 0.1133920	best: 0.1133920 (1060)	total: 2

1139:	learn: 0.1005684	test: 0.1116438	best: 0.1116438 (1139)	total: 21.9s	remaining: 1m 14s
1140:	learn: 0.1005368	test: 0.1116288	best: 0.1116288 (1140)	total: 21.9s	remaining: 1m 14s
1141:	learn: 0.1005148	test: 0.1116150	best: 0.1116150 (1141)	total: 22s	remaining: 1m 14s
1142:	learn: 0.1004790	test: 0.1115953	best: 0.1115953 (1142)	total: 22s	remaining: 1m 14s
1143:	learn: 0.1004490	test: 0.1115680	best: 0.1115680 (1143)	total: 22s	remaining: 1m 14s
1144:	learn: 0.1004185	test: 0.1115494	best: 0.1115494 (1144)	total: 22s	remaining: 1m 14s
1145:	learn: 0.1003965	test: 0.1115373	best: 0.1115373 (1145)	total: 22s	remaining: 1m 14s
1146:	learn: 0.1003629	test: 0.1115136	best: 0.1115136 (1146)	total: 22s	remaining: 1m 14s
1147:	learn: 0.1003280	test: 0.1114902	best: 0.1114902 (1147)	total: 22.1s	remaining: 1m 13s
1148:	learn: 0.1003118	test: 0.1114758	best: 0.1114758 (1148)	total: 22.1s	remaining: 1m 13s
1149:	learn: 0.1002959	test: 0.1114673	best: 0.1114673 (1149)	total: 22.1s	remaini

1229:	learn: 0.0982058	test: 0.1101924	best: 0.1101924 (1229)	total: 23.4s	remaining: 1m 11s
1230:	learn: 0.0981916	test: 0.1101885	best: 0.1101885 (1230)	total: 23.4s	remaining: 1m 11s
1231:	learn: 0.0981631	test: 0.1101635	best: 0.1101635 (1231)	total: 23.4s	remaining: 1m 11s
1232:	learn: 0.0981404	test: 0.1101476	best: 0.1101476 (1232)	total: 23.5s	remaining: 1m 11s
1233:	learn: 0.0981099	test: 0.1101209	best: 0.1101209 (1233)	total: 23.5s	remaining: 1m 11s
1234:	learn: 0.0980799	test: 0.1101059	best: 0.1101059 (1234)	total: 23.5s	remaining: 1m 11s
1235:	learn: 0.0980464	test: 0.1100835	best: 0.1100835 (1235)	total: 23.5s	remaining: 1m 11s
1236:	learn: 0.0980260	test: 0.1100736	best: 0.1100736 (1236)	total: 23.5s	remaining: 1m 11s
1237:	learn: 0.0979897	test: 0.1100488	best: 0.1100488 (1237)	total: 23.5s	remaining: 1m 11s
1238:	learn: 0.0979478	test: 0.1100174	best: 0.1100174 (1238)	total: 23.6s	remaining: 1m 11s
1239:	learn: 0.0979098	test: 0.1099807	best: 0.1099807 (1239)	total: 2

1319:	learn: 0.0957833	test: 0.1087745	best: 0.1087745 (1319)	total: 24.9s	remaining: 1m 9s
1320:	learn: 0.0957556	test: 0.1087544	best: 0.1087544 (1320)	total: 24.9s	remaining: 1m 9s
1321:	learn: 0.0957365	test: 0.1087379	best: 0.1087379 (1321)	total: 24.9s	remaining: 1m 9s
1322:	learn: 0.0957239	test: 0.1087358	best: 0.1087358 (1322)	total: 25s	remaining: 1m 9s
1323:	learn: 0.0957075	test: 0.1087291	best: 0.1087291 (1323)	total: 25s	remaining: 1m 9s
1324:	learn: 0.0956762	test: 0.1087199	best: 0.1087199 (1324)	total: 25s	remaining: 1m 9s
1325:	learn: 0.0956514	test: 0.1087070	best: 0.1087070 (1325)	total: 25s	remaining: 1m 9s
1326:	learn: 0.0956417	test: 0.1087052	best: 0.1087052 (1326)	total: 25s	remaining: 1m 9s
1327:	learn: 0.0956043	test: 0.1086826	best: 0.1086826 (1327)	total: 25s	remaining: 1m 9s
1328:	learn: 0.0955808	test: 0.1086738	best: 0.1086738 (1328)	total: 25.1s	remaining: 1m 9s
1329:	learn: 0.0955630	test: 0.1086571	best: 0.1086571 (1329)	total: 25.1s	remaining: 1m 9s


1409:	learn: 0.0937427	test: 0.1077412	best: 0.1077412 (1409)	total: 26.4s	remaining: 1m 7s
1410:	learn: 0.0937224	test: 0.1077326	best: 0.1077326 (1410)	total: 26.4s	remaining: 1m 7s
1411:	learn: 0.0936963	test: 0.1077274	best: 0.1077274 (1411)	total: 26.4s	remaining: 1m 7s
1412:	learn: 0.0936742	test: 0.1077219	best: 0.1077219 (1412)	total: 26.4s	remaining: 1m 7s
1413:	learn: 0.0936553	test: 0.1077047	best: 0.1077047 (1413)	total: 26.5s	remaining: 1m 7s
1414:	learn: 0.0936167	test: 0.1076779	best: 0.1076779 (1414)	total: 26.5s	remaining: 1m 7s
1415:	learn: 0.0936001	test: 0.1076727	best: 0.1076727 (1415)	total: 26.5s	remaining: 1m 7s
1416:	learn: 0.0935719	test: 0.1076482	best: 0.1076482 (1416)	total: 26.5s	remaining: 1m 7s
1417:	learn: 0.0935537	test: 0.1076402	best: 0.1076402 (1417)	total: 26.5s	remaining: 1m 7s
1418:	learn: 0.0935159	test: 0.1076175	best: 0.1076175 (1418)	total: 26.5s	remaining: 1m 6s
1419:	learn: 0.0935053	test: 0.1076168	best: 0.1076168 (1419)	total: 26.6s	remai

1500:	learn: 0.0916485	test: 0.1067732	best: 0.1067732 (1500)	total: 27.9s	remaining: 1m 5s
1501:	learn: 0.0916282	test: 0.1067664	best: 0.1067664 (1501)	total: 27.9s	remaining: 1m 5s
1502:	learn: 0.0916110	test: 0.1067649	best: 0.1067649 (1502)	total: 27.9s	remaining: 1m 5s
1503:	learn: 0.0915844	test: 0.1067484	best: 0.1067484 (1503)	total: 28s	remaining: 1m 4s
1504:	learn: 0.0915649	test: 0.1067438	best: 0.1067438 (1504)	total: 28s	remaining: 1m 4s
1505:	learn: 0.0915354	test: 0.1067295	best: 0.1067295 (1505)	total: 28s	remaining: 1m 4s
1506:	learn: 0.0915098	test: 0.1067229	best: 0.1067229 (1506)	total: 28s	remaining: 1m 4s
1507:	learn: 0.0914976	test: 0.1067132	best: 0.1067132 (1507)	total: 28s	remaining: 1m 4s
1508:	learn: 0.0914747	test: 0.1067043	best: 0.1067043 (1508)	total: 28s	remaining: 1m 4s
1509:	learn: 0.0914591	test: 0.1067019	best: 0.1067019 (1509)	total: 28.1s	remaining: 1m 4s
1510:	learn: 0.0914268	test: 0.1066769	best: 0.1066769 (1510)	total: 28.1s	remaining: 1m 4s


1591:	learn: 0.0898302	test: 0.1060181	best: 0.1060181 (1591)	total: 29.4s	remaining: 1m 2s
1592:	learn: 0.0897946	test: 0.1059870	best: 0.1059870 (1592)	total: 29.4s	remaining: 1m 2s
1593:	learn: 0.0897713	test: 0.1059778	best: 0.1059778 (1593)	total: 29.4s	remaining: 1m 2s
1594:	learn: 0.0897539	test: 0.1059671	best: 0.1059671 (1594)	total: 29.5s	remaining: 1m 2s
1595:	learn: 0.0897444	test: 0.1059668	best: 0.1059668 (1595)	total: 29.5s	remaining: 1m 2s
1596:	learn: 0.0897279	test: 0.1059629	best: 0.1059629 (1596)	total: 29.5s	remaining: 1m 2s
1597:	learn: 0.0897060	test: 0.1059521	best: 0.1059521 (1597)	total: 29.5s	remaining: 1m 2s
1598:	learn: 0.0896823	test: 0.1059377	best: 0.1059377 (1598)	total: 29.5s	remaining: 1m 2s
1599:	learn: 0.0896624	test: 0.1059304	best: 0.1059304 (1599)	total: 29.5s	remaining: 1m 2s
1600:	learn: 0.0896409	test: 0.1059168	best: 0.1059168 (1600)	total: 29.6s	remaining: 1m 2s
1601:	learn: 0.0896227	test: 0.1059098	best: 0.1059098 (1601)	total: 29.6s	remai

1681:	learn: 0.0881435	test: 0.1053186	best: 0.1053186 (1681)	total: 30.9s	remaining: 1m
1682:	learn: 0.0881177	test: 0.1053029	best: 0.1053029 (1682)	total: 30.9s	remaining: 1m
1683:	learn: 0.0881077	test: 0.1053057	best: 0.1053029 (1682)	total: 30.9s	remaining: 1m
1684:	learn: 0.0880979	test: 0.1053086	best: 0.1053029 (1682)	total: 31s	remaining: 1m
1685:	learn: 0.0880879	test: 0.1053107	best: 0.1053029 (1682)	total: 31s	remaining: 1m
1686:	learn: 0.0880704	test: 0.1053040	best: 0.1053029 (1682)	total: 31s	remaining: 1m
1687:	learn: 0.0880543	test: 0.1053043	best: 0.1053029 (1682)	total: 31s	remaining: 1m
1688:	learn: 0.0880425	test: 0.1053024	best: 0.1053024 (1688)	total: 31s	remaining: 1m
1689:	learn: 0.0880316	test: 0.1053005	best: 0.1053005 (1689)	total: 31s	remaining: 1m
1690:	learn: 0.0880169	test: 0.1052926	best: 0.1052926 (1690)	total: 31.1s	remaining: 1m
1691:	learn: 0.0879869	test: 0.1052675	best: 0.1052675 (1691)	total: 31.1s	remaining: 1m
1692:	learn: 0.0879696	test: 0.10

1772:	learn: 0.0865642	test: 0.1046198	best: 0.1046198 (1772)	total: 32.4s	remaining: 59s
1773:	learn: 0.0865518	test: 0.1046139	best: 0.1046139 (1773)	total: 32.4s	remaining: 59s
1774:	learn: 0.0865362	test: 0.1046123	best: 0.1046123 (1774)	total: 32.4s	remaining: 58.9s
1775:	learn: 0.0865057	test: 0.1045861	best: 0.1045861 (1775)	total: 32.5s	remaining: 58.9s
1776:	learn: 0.0864777	test: 0.1045620	best: 0.1045620 (1776)	total: 32.5s	remaining: 58.9s
1777:	learn: 0.0864594	test: 0.1045514	best: 0.1045514 (1777)	total: 32.5s	remaining: 58.9s
1778:	learn: 0.0864354	test: 0.1045370	best: 0.1045370 (1778)	total: 32.5s	remaining: 58.8s
1779:	learn: 0.0864225	test: 0.1045357	best: 0.1045357 (1779)	total: 32.5s	remaining: 58.8s
1780:	learn: 0.0863988	test: 0.1045230	best: 0.1045230 (1780)	total: 32.5s	remaining: 58.8s
1781:	learn: 0.0863895	test: 0.1045238	best: 0.1045230 (1780)	total: 32.6s	remaining: 58.8s
1782:	learn: 0.0863792	test: 0.1045221	best: 0.1045221 (1782)	total: 32.6s	remaining

1862:	learn: 0.0850645	test: 0.1038826	best: 0.1038826 (1862)	total: 33.9s	remaining: 57.1s
1863:	learn: 0.0850517	test: 0.1038838	best: 0.1038826 (1862)	total: 33.9s	remaining: 57s
1864:	learn: 0.0850334	test: 0.1038765	best: 0.1038765 (1864)	total: 33.9s	remaining: 57s
1865:	learn: 0.0850136	test: 0.1038753	best: 0.1038753 (1865)	total: 33.9s	remaining: 57s
1866:	learn: 0.0849997	test: 0.1038756	best: 0.1038753 (1865)	total: 34s	remaining: 57s
1867:	learn: 0.0849827	test: 0.1038659	best: 0.1038659 (1867)	total: 34s	remaining: 57s
1868:	learn: 0.0849754	test: 0.1038621	best: 0.1038621 (1868)	total: 34s	remaining: 56.9s
1869:	learn: 0.0849617	test: 0.1038449	best: 0.1038449 (1869)	total: 34s	remaining: 56.9s
1870:	learn: 0.0849464	test: 0.1038361	best: 0.1038361 (1870)	total: 34s	remaining: 56.9s
1871:	learn: 0.0849317	test: 0.1038325	best: 0.1038325 (1871)	total: 34s	remaining: 56.9s
1872:	learn: 0.0849173	test: 0.1038282	best: 0.1038282 (1872)	total: 34.1s	remaining: 56.9s
1873:	lear

1963:	learn: 0.0835411	test: 0.1033253	best: 0.1033253 (1963)	total: 35.6s	remaining: 55.1s
1964:	learn: 0.0835267	test: 0.1033201	best: 0.1033201 (1964)	total: 35.6s	remaining: 55.1s
1965:	learn: 0.0835078	test: 0.1033071	best: 0.1033071 (1965)	total: 35.7s	remaining: 55s
1966:	learn: 0.0834940	test: 0.1033030	best: 0.1033030 (1966)	total: 35.7s	remaining: 55s
1967:	learn: 0.0834801	test: 0.1033040	best: 0.1033030 (1966)	total: 35.7s	remaining: 55s
1968:	learn: 0.0834632	test: 0.1033013	best: 0.1033013 (1968)	total: 35.7s	remaining: 55s
1969:	learn: 0.0834488	test: 0.1033002	best: 0.1033002 (1969)	total: 35.7s	remaining: 55s
1970:	learn: 0.0834329	test: 0.1032938	best: 0.1032938 (1970)	total: 35.8s	remaining: 54.9s
1971:	learn: 0.0834167	test: 0.1032885	best: 0.1032885 (1971)	total: 35.8s	remaining: 54.9s
1972:	learn: 0.0834002	test: 0.1032846	best: 0.1032846 (1972)	total: 35.8s	remaining: 54.9s
1973:	learn: 0.0833756	test: 0.1032631	best: 0.1032631 (1973)	total: 35.8s	remaining: 54.9

2059:	learn: 0.0821191	test: 0.1028647	best: 0.1028647 (2059)	total: 37.3s	remaining: 53.3s
2060:	learn: 0.0821119	test: 0.1028611	best: 0.1028611 (2060)	total: 37.4s	remaining: 53.3s
2061:	learn: 0.0821094	test: 0.1028608	best: 0.1028608 (2061)	total: 37.4s	remaining: 53.2s
2062:	learn: 0.0820878	test: 0.1028528	best: 0.1028528 (2062)	total: 37.4s	remaining: 53.2s
2063:	learn: 0.0820803	test: 0.1028523	best: 0.1028523 (2063)	total: 37.4s	remaining: 53.2s
2064:	learn: 0.0820669	test: 0.1028511	best: 0.1028511 (2064)	total: 37.4s	remaining: 53.2s
2065:	learn: 0.0820476	test: 0.1028490	best: 0.1028490 (2065)	total: 37.4s	remaining: 53.2s
2066:	learn: 0.0820300	test: 0.1028447	best: 0.1028447 (2066)	total: 37.5s	remaining: 53.1s
2067:	learn: 0.0820103	test: 0.1028399	best: 0.1028399 (2067)	total: 37.5s	remaining: 53.1s
2068:	learn: 0.0819875	test: 0.1028305	best: 0.1028305 (2068)	total: 37.5s	remaining: 53.1s
2069:	learn: 0.0819749	test: 0.1028243	best: 0.1028243 (2069)	total: 37.5s	remai

2149:	learn: 0.0807658	test: 0.1024868	best: 0.1024868 (2149)	total: 38.8s	remaining: 51.5s
2150:	learn: 0.0807443	test: 0.1024847	best: 0.1024847 (2150)	total: 38.9s	remaining: 51.5s
2151:	learn: 0.0807298	test: 0.1024810	best: 0.1024810 (2151)	total: 38.9s	remaining: 51.4s
2152:	learn: 0.0807149	test: 0.1024741	best: 0.1024741 (2152)	total: 38.9s	remaining: 51.4s
2153:	learn: 0.0807020	test: 0.1024776	best: 0.1024741 (2152)	total: 38.9s	remaining: 51.4s
2154:	learn: 0.0806871	test: 0.1024753	best: 0.1024741 (2152)	total: 38.9s	remaining: 51.4s
2155:	learn: 0.0806710	test: 0.1024773	best: 0.1024741 (2152)	total: 38.9s	remaining: 51.4s
2156:	learn: 0.0806552	test: 0.1024746	best: 0.1024741 (2152)	total: 39s	remaining: 51.3s
2157:	learn: 0.0806450	test: 0.1024728	best: 0.1024728 (2157)	total: 39s	remaining: 51.3s
2158:	learn: 0.0806287	test: 0.1024686	best: 0.1024686 (2158)	total: 39s	remaining: 51.3s
2159:	learn: 0.0806091	test: 0.1024619	best: 0.1024619 (2159)	total: 39s	remaining: 51

2250:	learn: 0.0791626	test: 0.1020828	best: 0.1020828 (2250)	total: 40.5s	remaining: 49.5s
2251:	learn: 0.0791486	test: 0.1020793	best: 0.1020793 (2251)	total: 40.6s	remaining: 49.5s
2252:	learn: 0.0791288	test: 0.1020761	best: 0.1020761 (2252)	total: 40.6s	remaining: 49.5s
2253:	learn: 0.0791175	test: 0.1020757	best: 0.1020757 (2253)	total: 40.6s	remaining: 49.4s
2254:	learn: 0.0790949	test: 0.1020732	best: 0.1020732 (2254)	total: 40.6s	remaining: 49.4s
2255:	learn: 0.0790802	test: 0.1020643	best: 0.1020643 (2255)	total: 40.6s	remaining: 49.4s
2256:	learn: 0.0790582	test: 0.1020701	best: 0.1020643 (2255)	total: 40.6s	remaining: 49.4s
2257:	learn: 0.0790526	test: 0.1020690	best: 0.1020643 (2255)	total: 40.7s	remaining: 49.4s
2258:	learn: 0.0790335	test: 0.1020681	best: 0.1020643 (2255)	total: 40.7s	remaining: 49.3s
2259:	learn: 0.0790222	test: 0.1020564	best: 0.1020564 (2259)	total: 40.7s	remaining: 49.3s
2260:	learn: 0.0790103	test: 0.1020545	best: 0.1020545 (2260)	total: 40.7s	remai

2351:	learn: 0.0774306	test: 0.1014061	best: 0.1014061 (2351)	total: 42.2s	remaining: 47.6s
2352:	learn: 0.0774077	test: 0.1013951	best: 0.1013951 (2352)	total: 42.3s	remaining: 47.5s
2353:	learn: 0.0773901	test: 0.1013887	best: 0.1013887 (2353)	total: 42.3s	remaining: 47.5s
2354:	learn: 0.0773797	test: 0.1013873	best: 0.1013873 (2354)	total: 42.3s	remaining: 47.5s
2355:	learn: 0.0773298	test: 0.1013308	best: 0.1013308 (2355)	total: 42.3s	remaining: 47.5s
2356:	learn: 0.0773171	test: 0.1013243	best: 0.1013243 (2356)	total: 42.3s	remaining: 47.5s
2357:	learn: 0.0773040	test: 0.1013260	best: 0.1013243 (2356)	total: 42.3s	remaining: 47.4s
2358:	learn: 0.0772930	test: 0.1013227	best: 0.1013227 (2358)	total: 42.4s	remaining: 47.4s
2359:	learn: 0.0772806	test: 0.1013171	best: 0.1013171 (2359)	total: 42.4s	remaining: 47.4s
2360:	learn: 0.0772629	test: 0.1013086	best: 0.1013086 (2360)	total: 42.4s	remaining: 47.4s
2361:	learn: 0.0772544	test: 0.1013107	best: 0.1013086 (2360)	total: 42.4s	remai

2441:	learn: 0.0760530	test: 0.1009632	best: 0.1009632 (2441)	total: 43.8s	remaining: 45.8s
2442:	learn: 0.0760429	test: 0.1009616	best: 0.1009616 (2442)	total: 43.8s	remaining: 45.8s
2443:	learn: 0.0760386	test: 0.1009605	best: 0.1009605 (2443)	total: 43.8s	remaining: 45.8s
2444:	learn: 0.0760143	test: 0.1009351	best: 0.1009351 (2444)	total: 43.8s	remaining: 45.8s
2445:	learn: 0.0759986	test: 0.1009355	best: 0.1009351 (2444)	total: 43.8s	remaining: 45.8s
2446:	learn: 0.0759850	test: 0.1009303	best: 0.1009303 (2446)	total: 43.8s	remaining: 45.7s
2447:	learn: 0.0759665	test: 0.1009169	best: 0.1009169 (2447)	total: 43.9s	remaining: 45.7s
2448:	learn: 0.0759514	test: 0.1009099	best: 0.1009099 (2448)	total: 43.9s	remaining: 45.7s
2449:	learn: 0.0759415	test: 0.1009076	best: 0.1009076 (2449)	total: 43.9s	remaining: 45.7s
2450:	learn: 0.0759218	test: 0.1009068	best: 0.1009068 (2450)	total: 43.9s	remaining: 45.7s
2451:	learn: 0.0759009	test: 0.1008937	best: 0.1008937 (2451)	total: 43.9s	remai

2542:	learn: 0.0746157	test: 0.1005199	best: 0.1005163 (2537)	total: 45.4s	remaining: 43.9s
2543:	learn: 0.0745768	test: 0.1004717	best: 0.1004717 (2543)	total: 45.5s	remaining: 43.9s
2544:	learn: 0.0745673	test: 0.1004719	best: 0.1004717 (2543)	total: 45.5s	remaining: 43.9s
2545:	learn: 0.0745560	test: 0.1004716	best: 0.1004716 (2545)	total: 45.5s	remaining: 43.8s
2546:	learn: 0.0745373	test: 0.1004683	best: 0.1004683 (2546)	total: 45.5s	remaining: 43.8s
2547:	learn: 0.0745155	test: 0.1004725	best: 0.1004683 (2546)	total: 45.5s	remaining: 43.8s
2548:	learn: 0.0745079	test: 0.1004728	best: 0.1004683 (2546)	total: 45.5s	remaining: 43.8s
2549:	learn: 0.0744952	test: 0.1004681	best: 0.1004681 (2549)	total: 45.6s	remaining: 43.8s
2550:	learn: 0.0744728	test: 0.1004526	best: 0.1004526 (2550)	total: 45.6s	remaining: 43.8s
2551:	learn: 0.0744554	test: 0.1004513	best: 0.1004513 (2551)	total: 45.6s	remaining: 43.7s
2552:	learn: 0.0744397	test: 0.1004505	best: 0.1004505 (2552)	total: 45.6s	remai

2641:	learn: 0.0732600	test: 0.1001369	best: 0.1001369 (2641)	total: 47.1s	remaining: 42s
2642:	learn: 0.0732461	test: 0.1001353	best: 0.1001353 (2642)	total: 47.1s	remaining: 42s
2643:	learn: 0.0732392	test: 0.1001347	best: 0.1001347 (2643)	total: 47.1s	remaining: 42s
2644:	learn: 0.0732285	test: 0.1001332	best: 0.1001332 (2644)	total: 47.2s	remaining: 42s
2645:	learn: 0.0732140	test: 0.1001366	best: 0.1001332 (2644)	total: 47.2s	remaining: 42s
2646:	learn: 0.0732049	test: 0.1001350	best: 0.1001332 (2644)	total: 47.2s	remaining: 41.9s
2647:	learn: 0.0731923	test: 0.1001375	best: 0.1001332 (2644)	total: 47.2s	remaining: 41.9s
2648:	learn: 0.0731832	test: 0.1001386	best: 0.1001332 (2644)	total: 47.2s	remaining: 41.9s
2649:	learn: 0.0731737	test: 0.1001331	best: 0.1001331 (2649)	total: 47.2s	remaining: 41.9s
2650:	learn: 0.0731523	test: 0.1001191	best: 0.1001191 (2650)	total: 47.3s	remaining: 41.9s
2651:	learn: 0.0731398	test: 0.1001141	best: 0.1001141 (2651)	total: 47.3s	remaining: 41.9

In [25]:
# Get the best parameters
best_params = halving_search.best_params_
best_params


{'depth': 8, 'learning_rate': 0.01, 'n_estimators': 5000}

In [None]:
# Fit the model
# CatBoostError: only one of the parameters iterations, n_estimators, num_boost_round, num_trees should be initialized.
improved_cat = make_pipeline(
    MinMaxScaler(), # StandardScaler, MinMaxScaler, RobustScaler
    CatBoostClassifier(n_estimators=7000,
                       depth=8,
                       learning_rate=0.008,
                       loss_function='CrossEntropy',
                       random_seed=42,
                       verbose=0,
                       thread_count=-1)
)

improved_cat.fit(X_train, y_train)

In [None]:
# Train the model
y_pred_train_best_model = improved_cat.predict(X_train)

In [None]:
# Evaluate model on training data
print(confusion_matrix(y_train, y_pred_train_best_model))

In [None]:
# Make predictions on validation data
y_pred_val_best_model = improved_cat.predict(X_val)
cm = confusion_matrix(y_val, y_pred_val_best_model)
#pretty plot
plt.figure(figsize=(2,2))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')

# Add labels (depends on your problem)
plt.xlabel('Predicted')
plt.ylabel('Actual')

plt.show()

In [73]:
[[TN,FP],[FN,TP]] = cm
Accuracy = (TN + TP) / (TN + FP + FN + TP)
Precision = TP / (TP + FP)
Recall = TP / (TP + FN)
F1_score = 2 * (Precision * Recall) / (Precision + Recall)
maybe_acc = Accuracy - 0.003
target = 0.96
print(f"cm:\n{cm}\nAccuracy:\t{Accuracy:.7f}\nPrecision:\t{Precision:.7f}\nRecall:\t\t{Recall:.7f}\nF1_score:\t{F1_score:.7f}\n\nMaybe Acc:\t{maybe_acc:.7f}\nTarget:\t\t{target:.7f}")


cm:
[[ 8070   335]
 [  450 10021]]
Accuracy:	0.9584128
Precision:	0.9676516
Recall:		0.9570242
F1_score:	0.9623085

Maybe Acc:	0.9554128
Target:		0.9600000


In [79]:
# Make prediction
y_pred_test_improved_cat = improved_cat.predict(test_data)
y_pred_test_improved_cat

array([1, 1, 1, ..., 1, 1, 0], dtype=int64)

In [80]:
results_to_csv(y_pred_test_improved_cat, "y_pred_test_improved_cat")

         ID  Overall_Experience
0  99900001                   1
1  99900002                   1
2  99900003                   1
3  99900004                   0
4  99900005                   1
