In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb

# CSV dosyasını oku
df = pd.read_csv('vk5_drop.csv')

# 'Name' ve diğer istenmeyen sütunları çıkar
df = df.drop(columns=['name'])

# 'Label' sütununu hedef değişken (y) olarak ayarla
y = df['label']

# LabelEncoder ile etiketleri 0, 1, 2, 3 olacak şekilde dönüştür
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Geri kalan sütunları özellikler (X) olarak ayarla
X = df.drop(columns=['label'])

# Veriyi eğitim ve test setlerine ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# XGBoost modelini oluştur ve eval_set ile loss değerlerini izle
eval_set = [(X_train, y_train), (X_test, y_test)]
model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss')
model.fit(X_train, y_train, eval_set=eval_set, verbose=True)

# Test seti ile tahmin yap
y_pred = model.predict(X_test)

# Modeli değerlendir
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

# Son iterasyondaki log loss değerlerini yazdır
results = model.evals_result()
print("Son iterasyondaki log loss değerleri:")
print(f"Training: {results['validation_0']['mlogloss'][-1]}")
print(f"Validation: {results['validation_1']['mlogloss'][-1]}")

[0]	validation_0-mlogloss:1.07906	validation_1-mlogloss:1.08800
[1]	validation_0-mlogloss:0.89271	validation_1-mlogloss:0.91104
[2]	validation_0-mlogloss:0.76792	validation_1-mlogloss:0.79403
[3]	validation_0-mlogloss:0.67729	validation_1-mlogloss:0.70866
[4]	validation_0-mlogloss:0.60862	validation_1-mlogloss:0.64572
[5]	validation_0-mlogloss:0.55716	validation_1-mlogloss:0.59876
[6]	validation_0-mlogloss:0.51050	validation_1-mlogloss:0.55848
[7]	validation_0-mlogloss:0.47242	validation_1-mlogloss:0.52479
[8]	validation_0-mlogloss:0.44350	validation_1-mlogloss:0.50066
[9]	validation_0-mlogloss:0.41424	validation_1-mlogloss:0.47606
[10]	validation_0-mlogloss:0.39152	validation_1-mlogloss:0.45841
[11]	validation_0-mlogloss:0.37441	validation_1-mlogloss:0.44388
[12]	validation_0-mlogloss:0.35755	validation_1-mlogloss:0.43084
[13]	validation_0-mlogloss:0.34354	validation_1-mlogloss:0.42041
[14]	validation_0-mlogloss:0.32955	validation_1-mlogloss:0.41054
[15]	validation_0-mlogloss:0.31706	

In [None]:
# Hiperparametreler için grid
from sklearn.model_selection import train_test_split, GridSearchCV  # GridSearchCV import edildi
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

# GridSearchCV kullanarak en iyi hiperparametreleri bul
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# En iyi hiperparametreler
best_params = grid_search.best_params_
print(f'Best parameters: {best_params}')
model = xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='mlogloss', **best_params)
model.fit(X_train, y_train)
# Test seti ile tahmin yap
y_pred = model.predict(X_test)

# Modeli değerlendir
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)