In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from catboost import CatBoostClassifier

# CSV dosyasını oku
df = pd.read_csv('vk5_drop.csv')

# 'Name' ve diğer istenmeyen sütunları çıkar
df = df.drop(columns=['name'])

# 'Label' sütununu hedef değişken (y) olarak ayarla
y = df['label']

# LabelEncoder ile etiketleri 0, 1, 2, 3 olacak şekilde dönüştür
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Geri kalan sütunları özellikler (X) olarak ayarla
X = df.drop(columns=['label'])

# Veriyi eğitim ve test setlerine ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# CatBoost modelini oluştur ve fit et
model = CatBoostClassifier(random_state=42, eval_metric='MultiClass', verbose=100)
model.fit(X_train, y_train, eval_set=(X_test, y_test))

# Test seti ile tahmin yap
y_pred = model.predict(X_test)

# Modeli değerlendir
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)




Learning rate set to 0.112657
0:	learn: 1.2582924	test: 1.2583777	best: 1.2583777 (0)	total: 79.9ms	remaining: 1m 19s
100:	learn: 0.3817295	test: 0.4147677	best: 0.4147677 (100)	total: 1.01s	remaining: 9.02s
200:	learn: 0.2747475	test: 0.3460507	best: 0.3460507 (200)	total: 1.91s	remaining: 7.61s
300:	learn: 0.2171167	test: 0.3104739	best: 0.3104739 (300)	total: 2.79s	remaining: 6.48s
400:	learn: 0.1782501	test: 0.2894123	best: 0.2894123 (400)	total: 3.66s	remaining: 5.47s
500:	learn: 0.1488647	test: 0.2740817	best: 0.2740817 (500)	total: 4.47s	remaining: 4.46s
600:	learn: 0.1270020	test: 0.2622708	best: 0.2622708 (600)	total: 5.34s	remaining: 3.55s
700:	learn: 0.1091380	test: 0.2535238	best: 0.2535238 (700)	total: 6.18s	remaining: 2.64s
800:	learn: 0.0949913	test: 0.2471061	best: 0.2470598 (799)	total: 7.02s	remaining: 1.74s
900:	learn: 0.0831674	test: 0.2423884	best: 0.2423884 (900)	total: 7.85s	remaining: 862ms
999:	learn: 0.0736768	test: 0.2369808	best: 0.2369808 (999)	total: 8.66s

In [2]:
# Hiperparametreler için grid
param_grid = {
    'iterations': [100, 200, 300],
    'depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'bootstrap_type': ['Bernoulli'],
    'subsample': [0.7, 0.8, 0.9]
}

# GridSearchCV kullanarak en iyi hiperparametreleri bul
grid_search = GridSearchCV(estimator=CatBoostClassifier(random_state=42, eval_metric='MultiClass', verbose=0),
                           param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# En iyi hiperparametreler
best_params = grid_search.best_params_
print(f'Best parameters: {best_params}')

# En iyi hiperparametrelerle CatBoost modelini oluştur ve fit et
model = CatBoostClassifier(random_state=42, eval_metric='MultiClass', **best_params, verbose=100)
model.fit(X_train, y_train)

# Test seti ile tahmin yap
y_pred = model.predict(X_test)

# Modeli değerlendir
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

Fitting 5 folds for each of 81 candidates, totalling 405 fits
Best parameters: {'bootstrap_type': 'Bernoulli', 'depth': 7, 'iterations': 300, 'learning_rate': 0.2, 'subsample': 0.8}
0:	learn: 1.1768046	total: 18.5ms	remaining: 5.52s
100:	learn: 0.2543802	total: 1.82s	remaining: 3.59s
200:	learn: 0.1490374	total: 3.64s	remaining: 1.79s
299:	learn: 0.0975824	total: 5.42s	remaining: 0us
Accuracy: 0.9012937230474365
Classification Report:
              precision    recall  f1-score   support

           0     0.9686    0.9597    0.9641       546
           1     0.9647    0.9748    0.9697       476
           2     0.8313    0.8965    0.8627       599
           3     0.8496    0.7639    0.8045       466

    accuracy                         0.9013      2087
   macro avg     0.9035    0.8987    0.9002      2087
weighted avg     0.9017    0.9013    0.9006      2087

