In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

!pip install catboost
from catboost import CatBoostClassifier


Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [2]:
data = load_breast_cancer()
data


{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [3]:
x = data.data
y = data.target

In [4]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42, stratify=y)

In [5]:
model = CatBoostClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
model.fit(x_train,y_train)

0:	learn: 0.5438295	total: 101ms	remaining: 10s
1:	learn: 0.4291055	total: 135ms	remaining: 6.63s
2:	learn: 0.3535684	total: 174ms	remaining: 5.63s
3:	learn: 0.2957751	total: 206ms	remaining: 4.95s
4:	learn: 0.2510264	total: 246ms	remaining: 4.67s
5:	learn: 0.2162280	total: 260ms	remaining: 4.07s
6:	learn: 0.1852137	total: 329ms	remaining: 4.37s
7:	learn: 0.1640240	total: 344ms	remaining: 3.96s
8:	learn: 0.1486816	total: 359ms	remaining: 3.63s
9:	learn: 0.1292773	total: 374ms	remaining: 3.37s
10:	learn: 0.1168704	total: 389ms	remaining: 3.14s
11:	learn: 0.1019623	total: 403ms	remaining: 2.96s
12:	learn: 0.0939656	total: 419ms	remaining: 2.8s
13:	learn: 0.0880840	total: 486ms	remaining: 2.99s
14:	learn: 0.0800813	total: 530ms	remaining: 3s
15:	learn: 0.0757052	total: 572ms	remaining: 3s
16:	learn: 0.0692304	total: 604ms	remaining: 2.95s
17:	learn: 0.0642866	total: 632ms	remaining: 2.88s
18:	learn: 0.0596106	total: 656ms	remaining: 2.8s
19:	learn: 0.0562763	total: 689ms	remaining: 2.76s


<catboost.core.CatBoostClassifier at 0x7ced871b81a0>

In [7]:
y_pred = model.predict(x_test)
y_pred

array([0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0,
       0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 1, 1])

In [8]:
accuracy_score(y_test,y_pred)

0.956140350877193

In [9]:
confusion_matrix(y_test,y_pred)

array([[38,  4],
       [ 1, 71]])

In [11]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.97      0.90      0.94        42
           1       0.95      0.99      0.97        72

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

