# **COMPARISON OF NORMALIZATION AND STANDARDIZATION TECHNIQUES**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd

df = pd.read_csv('/content/drive/MyDrive/Code-smell-severity-classification-main/merged dataset_FE_LM_GC_DC.csv',
                    sep=',', encoding='iso-8859-1')

number_class = 13

# **DATA PREPROCESSING**

## **Transforming nominal categorical variables into ordinal categorical variabless**

In [3]:
df2 = pd.DataFrame.copy(df)

In [4]:
df2['modifier_type'].replace('abstract', 0.0, inplace=True)
df2['modifier_type'].replace('final', 1.0, inplace=True)
df2['modifier_type'].replace('other', 2.0, inplace=True)
df2['visibility_type'].replace('public', 0.0, inplace=True)
df2['visibility_type'].replace('private', 1.0, inplace=True)
df2['visibility_type'].replace('protected', 2.0, inplace=True)
df2['visibility_type'].replace('package', 3.0, inplace=True)

## **Predictor and Target Attributes**

In [5]:
predictors = df2.iloc[:, 8:92].values

In [6]:
target = df2.iloc[:, 7].values

## **Data Scaling**

Standardization (uses the mean and standard deviation as a reference).

Normalization (uses maximum and minimum values as a reference).

In [7]:
from sklearn.preprocessing import StandardScaler
predictors_stand = StandardScaler().fit_transform(predictors)

In [8]:
from sklearn.preprocessing import Normalizer
predictors_norm = Normalizer().fit_transform(predictors)

# **NAIVE BAYES**

:https://scikit-learn.org/stable/modules/naive_bayes.html

## **sklearn.naive_bayes.BernoulliNB**
Naive Bayes classifier for multivariate Bernoulli models.

Like MultinomialNB, this classifier is suitable for discrete data. The difference is that while MultinomialNB works with occurrence counts, BernoulliNB is designed for binary/boolean features.

https://scikit-learn.org/stable/modules/generated/sklearn.metrics.DistanceMetric.html

In [23]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors_stand, target, test_size = 0.3, random_state = 0)

from sklearn.naive_bayes import BernoulliNB
naive = BernoulliNB(force_alpha=True)
naive.fit(x_train, y_train)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_naive = naive.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_naive) * 100.0))

Accuracy: 61.34%


In [26]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

from sklearn.naive_bayes import BernoulliNB

# Creating the model
model = BernoulliNB(force_alpha=True)
result = cross_val_score(model, predictors_stand, target, cv = kfold)

# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation: %.2f%%" % (result.std() * 100.0))

Mean Accuracy: 63.76%
Standard Deviation: 2.04%


# **SUPPORT VECTOR MACHINES (SVM)**

https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

In [30]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors_stand, target, test_size = 0.3, random_state = 0)

from sklearn.svm import SVC
svm = SVC(kernel='rbf', random_state=1, C=1)
svm.fit(x_train, y_train)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_svm = svm.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_svm) * 100.0))

Accuracy: 68.77%


In [33]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

from sklearn.svm import SVC

# Creating the model
model = SVC(kernel='rbf', random_state=1, C=1)
result = cross_val_score(model, predictors_stand, target, cv = kfold)

# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation: %.2f%%" % (result.std() * 100.0))

Mean Accuracy: 68.01%
Standard Deviation: 3.32%


# **LOGISTIC REGRESSION**

https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html

***MULTINOMIAL LOGISTICS REGRESSION***

In the multiclass case, the training algorithm uses the one-vs-rest (OvR) scheme if the ‘multi_class’ option is set to ‘ovr’, and uses the cross-entropy loss if the ‘multi_class’ option is set to ‘multinomial’. (Currently the ‘multinomial’ option is supported only by the ‘lbfgs’, ‘sag’, ‘saga’ and ‘newton-cg’ solvers.)

The ‘newton-cg’, ‘sag’, and ‘lbfgs’ solvers support only L2 regularization with primal formulation, or no regularization. The ‘liblinear’ solver supports both L1 and L2 regularization, with a dual formulation only for the L2 penalty. The Elastic-Net regularization is only supported by the ‘saga’ solver.

In [34]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors_stand, target, test_size = 0.3, random_state = 0)

from sklearn.linear_model import LogisticRegression

logistica = LogisticRegression(random_state=1, max_iter=2000, penalty="l2", tol=0.0001,
                               multi_class="multinomial", C=1,solver="saga")
logistica.fit(x_train, y_train)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_logistic = logistica.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_logistic) * 100.0))

Accuracy: 71.38%


In [37]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

from sklearn.linear_model import LogisticRegression

# Creating the model
model = LogisticRegression(random_state=1, max_iter=10000, penalty="l2", tol=0.0001,
                               multi_class="multinomial", C=1,solver="saga")
result = cross_val_score(model, predictors, target, cv = kfold)

# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation: %.2f%%" % (result.std() * 100.0))

Mean Accuracy: 45.86%
Standard Deviation: 6.03%


# **K-NEAREST NEIGHBORS(KNN)**

https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html

In [38]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors_stand, target, test_size = 0.3, random_state = 0)

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=1)
knn.fit(x_train, y_train)


from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_knn = knn.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_knn) * 100.0))

Accuracy: 65.80%


In [41]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

from sklearn.neighbors import KNeighborsClassifier

# Creating the model
model = KNeighborsClassifier(n_neighbors=5, metric='minkowski', p=1)
result = cross_val_score(model, predictors_stand, target, cv = kfold)

# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation: %.2f%%" % (result.std() * 100.0))

Mean Accuracy: 67.23%
Standard Deviation: 2.55%


# **DECISION TREE**

https://scikit-learn.org/stable/modules/tree.html

In [51]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors_stand, target, test_size = 0.3, random_state = 0)

In [52]:
from sklearn.model_selection import GridSearchCV

model = DecisionTreeClassifier(criterion='entropy', random_state = 0)

param_grid = dict(
    min_samples_split=[2,3,4,5],
    min_samples_leaf=[1,2,3,4,5],
    max_depth=[1,2,3,4,5,6,7,8,9,10],
    )

# Configuring the search with Grid search
grid_search = GridSearchCV(model, param_grid, scoring='f1_macro')

# Configuring the best hyperparameters
grid_search.fit(x_train, y_train)

# The best hyperparameters
print(grid_search.best_params_)

{'max_depth': 7, 'min_samples_leaf': 2, 'min_samples_split': 5}


In [53]:
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(criterion='entropy', random_state = 0, max_depth=7, min_samples_leaf= 2, min_samples_split = 5)
tree.fit(x_train, y_train)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_tree = tree.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_tree) * 100.0))


Accuracy: 72.86%


In [54]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

from sklearn.tree import DecisionTreeClassifier

# Creating the model
model = DecisionTreeClassifier(criterion='entropy', random_state = 0, max_depth=7, min_samples_leaf= 2, min_samples_split = 5)
result = cross_val_score(model, predictors_stand, target, cv = kfold)

# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation: %.2f%%" % (result.std() * 100.0))

Mean Accuracy: 75.06%
Standard Deviation: 4.76%


# **RANDOM FOREST**

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html

In [70]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors, target, test_size = 0.3, random_state = 0)

In [66]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(criterion='entropy', random_state = 0)

param_grid = dict(
    n_estimators=[50,100,150,200,250],
    min_samples_split=[2,3,4,5],
    max_depth=[1,2,3,4,5,6,7,8,9,10],
    )

# Configuring the search with Grid search
grid_search = GridSearchCV(model, param_grid, scoring='f1_macro')

# Configuring the best hyperparameters
grid_search.fit(x_train, y_train)

# The best hyperparameters
print(grid_search.best_params_)

{'max_depth': 10, 'min_samples_split': 4, 'n_estimators': 50}


In [71]:
from sklearn.ensemble import RandomForestClassifier
random = RandomForestClassifier(criterion='entropy', random_state = 0, max_depth=10, min_samples_split=4, n_estimators=50)
random.fit(x_train, y_train)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_random = random.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_random) * 100.0))

Accuracy: 79.55%


In [72]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

from sklearn.ensemble import RandomForestClassifier

# Creating the model
model = RandomForestClassifier(criterion='entropy', random_state = 0, max_depth=10, min_samples_split=4, n_estimators=50)
result = cross_val_score(model, predictors, target, cv = kfold)

# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation: %.2f%%" % (result.std() * 100.0))

Mean Accuracy: 79.53%
Standard Deviation: 2.31%


# **XGBOOST**

https://xgboost.readthedocs.io/en/stable/

In [84]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors_norm, target, test_size = 0.3, random_state = 0)

In [85]:
from sklearn.model_selection import GridSearchCV
model = XGBClassifier(objective='multi:softprob', num_class=13, random_state=3)

param_grid = dict(
    n_estimators=[50, 100, 150, 200],
    max_depth=[1,2,3],
    learning_rate=[0.01, 0.05, 0.1, 0.2, 0.5]
    )

# Configuring the search with Grid search
grid_search = GridSearchCV(model, param_grid, scoring='f1_macro')

# Configuring the best hyperparameters
grid_search.fit(x_train, y_train)

# The best hyperparameters
print(grid_search.best_params_)

{'learning_rate': 0.2, 'max_depth': 2, 'n_estimators': 150}


In [86]:
xg = XGBClassifier(learning_rate=0.2, max_depth=2, n_estimators=150, objective='multi:softprob', num_class=13, random_state=3)
xg.fit(x_train,y_train)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_xg = xg.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_xg) * 100.0))

print(classification_report(y_test, predictions_xg))

confusion_matrix(y_test, predictions_xg)

Accuracy: 76.21%
              precision    recall  f1-score   support

         0.0       0.89      0.90      0.89       129
         1.0       0.00      0.00      0.00         6
         2.0       0.36      0.33      0.35        12
         3.0       0.00      0.00      0.00         1
         4.0       0.00      0.00      0.00         2
         5.0       0.58      0.86      0.69        29
         6.0       0.60      0.43      0.50         7
         7.0       0.67      0.50      0.57         8
         8.0       0.79      0.76      0.78        25
         9.0       0.64      0.75      0.69        12
        10.0       0.00      0.00      0.00         0
        11.0       0.67      0.20      0.31        10
        12.0       0.77      0.82      0.79        28

    accuracy                           0.76       269
   macro avg       0.46      0.43      0.43       269
weighted avg       0.75      0.76      0.75       269



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


array([[116,   0,   3,   0,   0,   3,   1,   2,   0,   1,   0,   0,   3],
       [  2,   0,   3,   0,   0,   1,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   4,   0,   0,   8,   0,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   1,   0,   0,   0,   0,   0,   0,   0],
       [  1,   0,   0,   0,   0,   1,   0,   0,   0,   0,   0,   0,   0],
       [  2,   0,   1,   0,   0,  25,   1,   0,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   4,   3,   0,   0,   0,   0,   0,   0],
       [  2,   0,   0,   0,   0,   0,   0,   4,   2,   0,   0,   0,   0],
       [  3,   0,   0,   0,   0,   0,   0,   0,  19,   3,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   3,   9,   0,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       [  3,   0,   0,   0,   0,   0,   0,   0,   0,   1,   0,   2,   4],
       [  2,   0,   0,   0,   0,   0,   0,   0,   0,   0,   2,   1,  23]])

### **Cross-Validation**

In [87]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [88]:
# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

In [91]:
# Creating the model
model = XGBClassifier(learning_rate=0.2, max_depth=2, n_estimators=150, objective='multi:softprob', num_class=13, random_state=3)
result = cross_val_score(model, predictors_norm, target, cv = kfold)
result


array([0.67597765, 0.7150838 , 0.75977654, 0.81005587, 0.79213483])

In [92]:
# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation: %.2f%%" % (result.std() * 100.0))

Mean Accuracy: 75.06%
Standard Deviation: 4.93%


# **CATBOOST**

https://catboost.ai/en/docs/

In [93]:
#Instalação
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.5-cp310-cp310-manylinux2014_x86_64.whl (98.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.2/98.2 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.5


In [98]:
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(predictors_norm, target, test_size = 0.3, random_state = 0)

In [99]:
from sklearn.model_selection import GridSearchCV
model = CatBoostClassifier(task_type='CPU', random_state = 5)

param_grid = dict(
    iterations=[100, 150, 200],
    learning_rate=[0.1, 0.2, 0.5],
    depth=[4,5,6,7],
    )

# Configuring the search with Grid search
grid_search = GridSearchCV(model, param_grid, scoring='f1_macro')

# Configuring the best hyperparameters
grid_search.fit(x_train, y_train)

# The best hyperparameters
print(grid_search.best_params_)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
1:	learn: 2.2490284	total: 612ms	remaining: 45.3s
2:	learn: 2.1623603	total: 892ms	remaining: 43.7s
3:	learn: 2.0379027	total: 1.18s	remaining: 43.2s
4:	learn: 1.9482296	total: 1.48s	remaining: 43s
5:	learn: 1.8486657	total: 1.8s	remaining: 43.1s
6:	learn: 1.7846241	total: 2.09s	remaining: 42.8s
7:	learn: 1.7201492	total: 2.42s	remaining: 42.9s
8:	learn: 1.6595388	total: 2.72s	remaining: 42.7s
9:	learn: 1.6139205	total: 3.01s	remaining: 42.2s
10:	learn: 1.5564879	total: 3.31s	remaining: 41.9s
11:	learn: 1.5061684	total: 3.6s	remaining: 41.5s
12:	learn: 1.4598762	total: 3.91s	remaining: 41.2s
13:	learn: 1.4167263	total: 4.21s	remaining: 40.9s
14:	learn: 1.3726186	total: 4.64s	remaining: 41.8s
15:	learn: 1.3401201	total: 5.16s	remaining: 43.2s
16:	learn: 1.3047487	total: 5.66s	remaining: 44.3s
17:	learn: 1.2744852	total: 6.16s	remaining: 45.2s
18:	learn: 1.2421787	total: 6.7s	remaining: 46.2s
19:	learn: 1.2154488	total: 7.2

In [100]:
from catboost import CatBoostClassifier
catboost = CatBoostClassifier(task_type='CPU', depth = 4, iterations=200, learning_rate=0.2, random_state = 5, eval_metric="Accuracy")
catboost.fit( x_train, y_train, plot=True, eval_set=(x_test, y_test))

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
predictions_cat = catboost.predict(x_test)
print("Accuracy: %.2f%%" % (accuracy_score(y_test, predictions_cat) * 100.0))

print(classification_report(y_test, predictions_cat))

confusion_matrix(y_test, predictions_cat)

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 0.4816000	test: 0.5167286	best: 0.5167286 (0)	total: 140ms	remaining: 27.9s
1:	learn: 0.5504000	test: 0.5464684	best: 0.5464684 (1)	total: 221ms	remaining: 21.8s
2:	learn: 0.5792000	test: 0.5910781	best: 0.5910781 (2)	total: 321ms	remaining: 21.1s
3:	learn: 0.6416000	test: 0.6505576	best: 0.6505576 (3)	total: 404ms	remaining: 19.8s
4:	learn: 0.6624000	test: 0.6431227	best: 0.6505576 (3)	total: 485ms	remaining: 18.9s
5:	learn: 0.6720000	test: 0.6617100	best: 0.6617100 (5)	total: 584ms	remaining: 18.9s
6:	learn: 0.6784000	test: 0.6691450	best: 0.6691450 (6)	total: 725ms	remaining: 20s
7:	learn: 0.6784000	test: 0.6654275	best: 0.6691450 (6)	total: 911ms	remaining: 21.9s
8:	learn: 0.6912000	test: 0.6840149	best: 0.6840149 (8)	total: 1.08s	remaining: 23s
9:	learn: 0.6944000	test: 0.6765799	best: 0.6840149 (8)	total: 1.23s	remaining: 23.4s
10:	learn: 0.6960000	test: 0.6765799	best: 0.6840149 (8)	total: 1.35s	remaining: 23.2s
11:	learn: 0.6960000	test: 0.6802974	best: 0.6840149 (8)	

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


array([[119,   0,   1,   0,   0,   3,   0,   0,   1,   0,   0,   5],
       [  2,   0,   0,   0,   0,   4,   0,   0,   0,   0,   0,   0],
       [  0,   0,   3,   0,   0,   8,   1,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   1,   0,   0,   0,   0,   0,   0],
       [  1,   0,   0,   0,   0,   1,   0,   0,   0,   0,   0,   0],
       [  3,   0,   1,   0,   0,  24,   1,   0,   0,   0,   0,   0],
       [  0,   0,   0,   0,   0,   5,   2,   0,   0,   0,   0,   0],
       [  1,   0,   0,   0,   0,   0,   0,   3,   4,   0,   0,   0],
       [  5,   0,   0,   0,   0,   0,   0,   0,  19,   1,   0,   0],
       [  0,   0,   0,   0,   0,   0,   0,   0,   3,   9,   0,   0],
       [  3,   0,   0,   0,   0,   0,   0,   0,   0,   0,   1,   6],
       [  1,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  27]])

### **Cross-Validation**

In [101]:
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

# Separating data into folds
kfold = KFold(n_splits = 5, shuffle=True, random_state = 5)

# Creating the model
model = CatBoostClassifier(task_type='CPU', iterations=200, learning_rate=0.2, depth = 4, random_state = 5, eval_metric="Accuracy")
result = cross_val_score(model, predictors_norm, target, cv = kfold)

# We use the mean accuracy and standard deviation
print("Mean Accuracy: %.2f%%" % (result.mean() * 100.0))
print("Standard Deviation:: %.2f%%" % (result.std() * 100.0))

0:	learn: 0.5230769	total: 254ms	remaining: 50.5s
1:	learn: 0.5580420	total: 377ms	remaining: 37.3s
2:	learn: 0.5748252	total: 512ms	remaining: 33.6s
3:	learn: 0.5846154	total: 642ms	remaining: 31.4s
4:	learn: 0.5860140	total: 746ms	remaining: 29.1s
5:	learn: 0.6195804	total: 852ms	remaining: 27.5s
6:	learn: 0.6433566	total: 980ms	remaining: 27s
7:	learn: 0.6573427	total: 1.08s	remaining: 26s
8:	learn: 0.6657343	total: 1.23s	remaining: 26s
9:	learn: 0.6685315	total: 1.32s	remaining: 25.1s
10:	learn: 0.6727273	total: 1.43s	remaining: 24.6s
11:	learn: 0.6881119	total: 1.54s	remaining: 24.1s
12:	learn: 0.7006993	total: 1.65s	remaining: 23.7s
13:	learn: 0.7034965	total: 1.88s	remaining: 25s
14:	learn: 0.7104895	total: 2.08s	remaining: 25.7s
15:	learn: 0.7146853	total: 2.24s	remaining: 25.8s
16:	learn: 0.7146853	total: 2.42s	remaining: 26.1s
17:	learn: 0.7202797	total: 2.62s	remaining: 26.5s
18:	learn: 0.7230769	total: 2.87s	remaining: 27.3s
19:	learn: 0.7286713	total: 3.16s	remaining: 28.4