# Problem 1. 
## Defalut value of `loss_function = Logloss` in multiclass classification

In [50]:
import sklearn
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris

# Preparing Iris data

In [51]:
np.random.seed(0)

In [52]:
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)

features = df.columns[:4]

df['species'] = pd.factorize(df['species'])[0] # enumerate lables

In [53]:
df['is_train'] = np.random.uniform(0, 1, len(df)) <= .75

In [54]:
df['species'].unique()

array([0, 1, 2])

In [55]:
X_train, X_test = df[df['is_train']==True], df[df['is_train']==False]

y_train = X_train['species']
y_test = X_test['species']

X_train = X_train[features]
X_test = X_test[features]

In [56]:
X_train.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


## This is a multiclass classification task!

### Let's train RandomForestClassifier and CatBoostClassifier with default parameters to compare them

# Random Forest

In [58]:
rf_clf = RandomForestClassifier()
rf_clf.fit(X_train, y_train)



RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [59]:
from sklearn.metrics import accuracy_score

In [60]:
print("accuracy train:", accuracy_score(y_train, rf_clf.predict(X_train)))
print("accuracy test:", accuracy_score(y_test, rf_clf.predict(X_test)))

accuracy train: 0.9915254237288136
accuracy test: 0.96875


Ok, good baseline

## Now let's train CatBoostClassifier

In [61]:
from catboost import CatBoostClassifier

In [62]:
cb_clf = CatBoostClassifier()
cb_clf.fit(X_train, y_train)

Learning rate set to 0.009151
0:	learn: 0.6737590	total: 20.9ms	remaining: 20.8s
1:	learn: 0.6570463	total: 32.9ms	remaining: 16.4s
2:	learn: 0.6409422	total: 41.2ms	remaining: 13.7s
3:	learn: 0.6200341	total: 47.5ms	remaining: 11.8s
4:	learn: 0.6036743	total: 59.6ms	remaining: 11.9s
5:	learn: 0.5889379	total: 70.5ms	remaining: 11.7s
6:	learn: 0.5730424	total: 74.8ms	remaining: 10.6s
7:	learn: 0.5578881	total: 84.1ms	remaining: 10.4s
8:	learn: 0.5428021	total: 88ms	remaining: 9.69s
9:	learn: 0.5305906	total: 94.4ms	remaining: 9.35s
10:	learn: 0.5133178	total: 98.9ms	remaining: 8.89s
11:	learn: 0.5016907	total: 102ms	remaining: 8.39s
12:	learn: 0.4904760	total: 106ms	remaining: 8.02s
13:	learn: 0.4778430	total: 110ms	remaining: 7.72s
14:	learn: 0.4662893	total: 113ms	remaining: 7.45s
15:	learn: 0.4556160	total: 117ms	remaining: 7.21s
16:	learn: 0.4434002	total: 121ms	remaining: 6.98s
17:	learn: 0.4313908	total: 124ms	remaining: 6.74s
18:	learn: 0.4221340	total: 127ms	remaining: 6.58s
19

189:	learn: 0.0249586	total: 975ms	remaining: 4.16s
190:	learn: 0.0245878	total: 981ms	remaining: 4.16s
191:	learn: 0.0242732	total: 986ms	remaining: 4.15s
192:	learn: 0.0240192	total: 993ms	remaining: 4.15s
193:	learn: 0.0238520	total: 999ms	remaining: 4.15s
194:	learn: 0.0236506	total: 1s	remaining: 4.15s
195:	learn: 0.0232949	total: 1.01s	remaining: 4.14s
196:	learn: 0.0231224	total: 1.01s	remaining: 4.13s
197:	learn: 0.0228882	total: 1.02s	remaining: 4.13s
198:	learn: 0.0226569	total: 1.03s	remaining: 4.17s
199:	learn: 0.0225138	total: 1.04s	remaining: 4.16s
200:	learn: 0.0222895	total: 1.04s	remaining: 4.15s
201:	learn: 0.0220614	total: 1.05s	remaining: 4.15s
202:	learn: 0.0219227	total: 1.05s	remaining: 4.14s
203:	learn: 0.0217514	total: 1.06s	remaining: 4.13s
204:	learn: 0.0215982	total: 1.06s	remaining: 4.12s
205:	learn: 0.0214673	total: 1.07s	remaining: 4.11s
206:	learn: 0.0212905	total: 1.07s	remaining: 4.1s
207:	learn: 0.0210746	total: 1.07s	remaining: 4.09s
208:	learn: 0.02

356:	learn: 0.0081831	total: 1.75s	remaining: 3.16s
357:	learn: 0.0081421	total: 1.76s	remaining: 3.15s
358:	learn: 0.0081111	total: 1.76s	remaining: 3.15s
359:	learn: 0.0080783	total: 1.77s	remaining: 3.15s
360:	learn: 0.0080479	total: 1.78s	remaining: 3.14s
361:	learn: 0.0080180	total: 1.78s	remaining: 3.14s
362:	learn: 0.0079751	total: 1.78s	remaining: 3.13s
363:	learn: 0.0079234	total: 1.79s	remaining: 3.13s
364:	learn: 0.0078846	total: 1.79s	remaining: 3.12s
365:	learn: 0.0078220	total: 1.8s	remaining: 3.11s
366:	learn: 0.0077665	total: 1.8s	remaining: 3.1s
367:	learn: 0.0077328	total: 1.8s	remaining: 3.1s
368:	learn: 0.0076963	total: 1.81s	remaining: 3.09s
369:	learn: 0.0076452	total: 1.81s	remaining: 3.08s
370:	learn: 0.0076057	total: 1.81s	remaining: 3.07s
371:	learn: 0.0075666	total: 1.82s	remaining: 3.07s
372:	learn: 0.0075396	total: 1.82s	remaining: 3.06s
373:	learn: 0.0075093	total: 1.82s	remaining: 3.05s
374:	learn: 0.0074849	total: 1.83s	remaining: 3.05s
375:	learn: 0.007

529:	learn: 0.0043107	total: 2.51s	remaining: 2.23s
530:	learn: 0.0042958	total: 2.52s	remaining: 2.22s
531:	learn: 0.0042845	total: 2.52s	remaining: 2.22s
532:	learn: 0.0042695	total: 2.53s	remaining: 2.22s
533:	learn: 0.0042597	total: 2.54s	remaining: 2.21s
534:	learn: 0.0042544	total: 2.54s	remaining: 2.21s
535:	learn: 0.0042427	total: 2.54s	remaining: 2.2s
536:	learn: 0.0042292	total: 2.55s	remaining: 2.2s
537:	learn: 0.0042176	total: 2.55s	remaining: 2.19s
538:	learn: 0.0042088	total: 2.56s	remaining: 2.19s
539:	learn: 0.0041950	total: 2.56s	remaining: 2.18s
540:	learn: 0.0041798	total: 2.56s	remaining: 2.17s
541:	learn: 0.0041622	total: 2.57s	remaining: 2.17s
542:	learn: 0.0041503	total: 2.57s	remaining: 2.16s
543:	learn: 0.0041358	total: 2.57s	remaining: 2.16s
544:	learn: 0.0041278	total: 2.58s	remaining: 2.15s
545:	learn: 0.0041108	total: 2.58s	remaining: 2.15s
546:	learn: 0.0040916	total: 2.58s	remaining: 2.14s
547:	learn: 0.0040778	total: 2.59s	remaining: 2.13s
548:	learn: 0.

719:	learn: 0.0026388	total: 3.26s	remaining: 1.27s
720:	learn: 0.0026338	total: 3.27s	remaining: 1.26s
721:	learn: 0.0026298	total: 3.27s	remaining: 1.26s
722:	learn: 0.0026247	total: 3.29s	remaining: 1.26s
723:	learn: 0.0026177	total: 3.3s	remaining: 1.26s
724:	learn: 0.0026135	total: 3.3s	remaining: 1.25s
725:	learn: 0.0026069	total: 3.31s	remaining: 1.25s
726:	learn: 0.0026026	total: 3.32s	remaining: 1.25s
727:	learn: 0.0025929	total: 3.32s	remaining: 1.24s
728:	learn: 0.0025861	total: 3.33s	remaining: 1.24s
729:	learn: 0.0025808	total: 3.33s	remaining: 1.23s
730:	learn: 0.0025740	total: 3.33s	remaining: 1.23s
731:	learn: 0.0025664	total: 3.34s	remaining: 1.22s
732:	learn: 0.0025598	total: 3.34s	remaining: 1.22s
733:	learn: 0.0025563	total: 3.34s	remaining: 1.21s
734:	learn: 0.0025539	total: 3.35s	remaining: 1.21s
735:	learn: 0.0025474	total: 3.35s	remaining: 1.2s
736:	learn: 0.0025419	total: 3.36s	remaining: 1.2s
737:	learn: 0.0025363	total: 3.36s	remaining: 1.19s
738:	learn: 0.00

886:	learn: 0.0019124	total: 4.01s	remaining: 511ms
887:	learn: 0.0019075	total: 4.02s	remaining: 507ms
888:	learn: 0.0019046	total: 4.02s	remaining: 502ms
889:	learn: 0.0019019	total: 4.03s	remaining: 498ms
890:	learn: 0.0018990	total: 4.04s	remaining: 494ms
891:	learn: 0.0018971	total: 4.04s	remaining: 489ms
892:	learn: 0.0018939	total: 4.04s	remaining: 485ms
893:	learn: 0.0018909	total: 4.05s	remaining: 480ms
894:	learn: 0.0018885	total: 4.05s	remaining: 475ms
895:	learn: 0.0018865	total: 4.05s	remaining: 471ms
896:	learn: 0.0018821	total: 4.06s	remaining: 466ms
897:	learn: 0.0018790	total: 4.06s	remaining: 461ms
898:	learn: 0.0018752	total: 4.07s	remaining: 457ms
899:	learn: 0.0018731	total: 4.07s	remaining: 452ms
900:	learn: 0.0018685	total: 4.07s	remaining: 448ms
901:	learn: 0.0018662	total: 4.08s	remaining: 443ms
902:	learn: 0.0018609	total: 4.08s	remaining: 438ms
903:	learn: 0.0018586	total: 4.08s	remaining: 434ms
904:	learn: 0.0018570	total: 4.08s	remaining: 429ms
905:	learn: 

<catboost.core.CatBoostClassifier at 0x7fdca9071550>

In [63]:
print(accuracy_score(y_train, clf.predict(X_train)))
print(accuracy_score(y_test, clf.predict(X_test)))

0.6779661016949152
0.625


### Hmmmm, not so good. Let's find out why

In [64]:
cb_clf.predict(X_train)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

The problem is that CatBoostClassifier has default parameter `loss_function = Logloss`, which is set for binary classification.

This is bad for for several reasons:
- Not all new users will go to the CatBoostClassifier documentation and read that they must set `loss_function = MultiClass` in multiclassification task, especially if they used `RandomForestClassifier` or `XGBClassifier` earlier with default parameters, because the last two classifiers automatically choose `binary/multiclass classification` based on number of unique values in `target` vector

- The second problem, in my opinion, may be crucial during the first usage of CatBoost, when the user might be confused. Described problem is easy to fix, so there is no need to open an issue, as the user just will go to the CatBoost documentation and set the `MultiClass` label. But my experiments with other programmers showed that some of new users might be really confused by this problem, and this might cause negative first impression in CatBoost user experience - we want to avoid it.

**Possible solution:** simply calculate how many unique values in the target label. If the number of unique values is three or more, then automatically switch to `MultiClass` label or, as an alternative, print a warning to uset that "it's probably better to switch `loss_function` to `MultiClass`

# Problem 2
## If in CatBoostClassifier `loss_function=MultiClass` and the labels are in range `{0,1}`, then the probabilities of classes are diferent from  `loss_function=Logloss`

In [69]:
df = df[df['species'] <= 1]

In [70]:
X_train, X_test = df[df['is_train']==True], df[df['is_train']==False]

y_train = X_train['species']
y_test = X_test['species']

X_train = X_train[features]
X_test = X_test[features]

In [71]:
y_train.unique()

array([0, 1])

## Now this is a binary classification task

In [80]:
cb_clf_multiclass = CatBoostClassifier(loss_function="MultiClass", n_estimators=5)
cb_clf_multiclass.fit(X_train, y_train)

0:	learn: -0.6723609	total: 28.7ms	remaining: 115ms
1:	learn: -0.6562648	total: 37.1ms	remaining: 55.7ms
2:	learn: -0.6350226	total: 40ms	remaining: 26.7ms
3:	learn: -0.6177452	total: 51.6ms	remaining: 12.9ms
4:	learn: -0.6010231	total: 60.4ms	remaining: 0us


<catboost.core.CatBoostClassifier at 0x7fdca910cda0>

In [81]:
cb_clf_binary = CatBoostClassifier(loss_function="Logloss", n_estimators=5)
cb_clf_binary.fit(X_train, y_train)

Learning rate set to 0.5
0:	learn: 0.1619941	total: 36.2ms	remaining: 145ms
1:	learn: 0.0671021	total: 58.2ms	remaining: 87.3ms
2:	learn: 0.0378335	total: 69.6ms	remaining: 46.4ms
3:	learn: 0.0162693	total: 78.2ms	remaining: 19.6ms
4:	learn: 0.0093864	total: 90.7ms	remaining: 0us


<catboost.core.CatBoostClassifier at 0x7fdca910cc88>

In [82]:
cb_clf_binary.predict_proba(X_test)[:3]

array([[0.99567032, 0.00432968],
       [0.99119748, 0.00880252],
       [0.99567032, 0.00432968]])

In [83]:
cb_clf_multiclass.predict_proba(X_test)[:3]

array([[0.55703732, 0.44296268],
       [0.55278628, 0.44721372],
       [0.55703732, 0.44296268]])

We see that the probabilities are different.

**Possible solution:** print warning to to user that "your labels are in range `{0,1}`, maybe you should set `loss_function=Logloss`"