In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb

# CSV dosyasını oku
df = pd.read_csv('vk5_drop.csv')

# 'Name' ve diğer istenmeyen sütunları çıkar
df = df.drop(columns=['name'])

# 'Label' sütununu hedef değişken (y) olarak ayarla
y = df['label']

# LabelEncoder ile etiketleri 0, 1, 2, 3 olacak şekilde dönüştür
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Geri kalan sütunları özellikler (X) olarak ayarla
X = df.drop(columns=['label'])

# Veriyi eğitim ve test setlerine ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LightGBM modelini oluştur ve eğit
model = lgb.LGBMClassifier(random_state=42)

# Modeli eğitim esnasında loss değerlerini takip etmek için eval_set parametresini kullan
eval_set = [(X_train, y_train), (X_test, y_test)]
model.fit(X_train, y_train, eval_metric='logloss', eval_set=eval_set)

# Test seti ile tahmin yap
y_pred = model.predict(X_test)

# Modeli değerlendir
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

# Eğitim esnasındaki son loss ve validation loss değerlerini çekmek için
results = model.evals_result_
training_loss = results['training']['multi_logloss'][-1]
validation_loss = results['valid_1']['multi_logloss'][-1]
print(f'Last Training Loss: {training_loss}')
print(f'Last Validation Loss: {validation_loss}')


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001035 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7751
[LightGBM] [Info] Number of data points in the train set: 8345, number of used features: 31
[LightGBM] [Info] Start training from score -1.359923
[LightGBM] [Info] Start training from score -1.518440
[LightGBM] [Info] Start training from score -1.165767
[LightGBM] [Info] Start training from score -1.548426
Accuracy: 0.8974604695735505
Classification Report:
              precision    recall  f1-score   support

           0     0.9722    0.9597    0.9659       546
           1     0.9711    0.9895    0.9802       476
           2     0.8250    0.8815    0.8523       599
           3     0.8274    0.7511    0.7874       466

    accuracy                         0.8975      2087
   macro avg     0.8989    0.8954    0.8965      2087
weighted avg     0.8974    0.8975    0.8967      2087

Last Trai

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
import lightgbm as lgb

# CSV dosyasını oku
df = pd.read_csv('vk5_drop.csv')

# 'Name' ve diğer istenmeyen sütunları çıkar
df = df.drop(columns=['name'])

# 'Label' sütununu hedef değişken (y) olarak ayarla
y = df['label']

# LabelEncoder ile etiketleri 0, 1, 2, 3 olacak şekilde dönüştür
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Geri kalan sütunları özellikler (X) olarak ayarla
X = df.drop(columns=['label'])

# Veriyi eğitim ve test setlerine ayır
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# LightGBM modelini oluştur
model = lgb.LGBMClassifier(random_state=42)

# Hiperparametre aralığını belirle
param_grid = {
    'num_leaves': [31, 50],
    'learning_rate': [0.1, 0.01],
    'n_estimators': [100, 200],
    'boosting_type': ['gbdt', 'dart']
}

# GridSearchCV ile en iyi hiperparametreleri bul
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

# En iyi parametreleri kullanarak LightGBM modelini oluştur ve eğit
best_params = grid_search.best_params_
model = lgb.LGBMClassifier(**best_params, random_state=42)

# Modeli eğitim esnasında loss değerlerini takip etmek için eval_set parametresini kullan
eval_set = [(X_train, y_train), (X_test, y_test)]
model.fit(X_train, y_train, eval_metric='logloss', eval_set=eval_set)

# Test seti ile tahmin yap
y_pred = model.predict(X_test)

# Modeli değerlendir
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred, digits=4)

print(f'Best Parameters: {best_params}')
print(f'Accuracy: {accuracy}')
print('Classification Report:')
print(report)

# Eğitim esnasındaki son loss ve validation loss değerlerini çekmek için
results = model.evals_result_
training_loss = results['training']['multi_logloss'][-1]
validation_loss = results['valid_1']['multi_logloss'][-1]
print(f'Last Training Loss: {training_loss}')
print(f'Last Validation Loss: {validation_loss}')


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000840 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7742
[LightGBM] [Info] Number of data points in the train set: 6676, number of used features: 31
[LightGBM] [Info] Start training from score -1.360273
[LightGBM] [Info] Start training from score -1.518030
[LightGBM] [Info] Start training from score -1.166151
[LightGBM] [Info] Start training from score -1.547862
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000929 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 7739
[LightGBM] [Info] Number of data points in the train set: 6676, number of used features: 31
[LightGBM] [Info] Start training from score -1.360273
[LightGBM] [Info] Start training from score -1.518030
[LightGBM] [Info] Start training from score -1.165670
[LightGBM] [Info] Start training from score -1