In [1]:

import itertools

import pandas as pd
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import RobustScaler
from xgboost import XGBClassifier

from src.evalution import evaluate_result

In [2]:
UNIQUE_LABELS = [0, 1, 2, 3]
FEATURES = ['project_name', 'project_version', 'label', 'code', 'code_comment',
            'code_no_comment', 'lc', 'pi', 'ma', 'nbd', 'ml', 'd', 'mi', 'fo', 'r',
            'e']
TRAIN_COLS = ['lc', 'pi', 'ma', 'nbd', 'ml', 'd', 'mi', 'fo', 'r', 'e']

In [3]:
train = pd.read_csv("../../datasets/data/train.csv")
x_train = train[TRAIN_COLS]
y_train = train["label"]

In [4]:
test = pd.read_csv("../../datasets/data/test.csv")
x_test = test[TRAIN_COLS]
y_test = test["label"]

In [7]:
param_grid = {
    'selector__k': [3, 5, 9, 10],
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [3, 6, 9],
    'classifier__learning_rate': [0.01, 0.1, 0.2],
    'classifier__subsample': [0.8, 1.0],
    'classifier__objective': ['multi:softmax', 'multi:softprob'],
    'classifier__booster': ['gbtree', 'gblinear', 'dart']  # Farklı booster türleri
}

param_combinations = list(itertools.product(*param_grid.values()))  # Tüm kombinasyonları oluştur

results = []

scaler = RobustScaler()

for params in param_combinations:
    param_dict = dict(zip(param_grid.keys(), params))

    selector = SelectKBest(score_func=f_classif, k=param_dict['selector__k'])

    model = XGBClassifier(
        n_estimators=param_dict['classifier__n_estimators'],
        max_depth=param_dict['classifier__max_depth'],
        learning_rate=param_dict['classifier__learning_rate'],
        subsample=param_dict['classifier__subsample'],
        objective=param_dict['classifier__objective'],
        booster=param_dict['classifier__booster'],  # Eksik olan booster parametresi eklendi
        use_label_encoder=False,
        eval_metric='mlogloss',
        num_class=4
    )

    x_train_scaled = scaler.fit_transform(x_train)
    x_test_scaled = scaler.transform(x_test)

    x_train_selected = selector.fit_transform(x_train_scaled, y_train)
    x_test_selected = selector.transform(x_test_scaled)

    model.fit(x_train_selected, y_train)

    y_pred = model.predict(x_test_selected)
    probs = model.predict_proba(x_test_selected)

    eval_results = evaluate_result(y_true=y_test, y_prediction=y_pred, prob=probs)
    
    eval_results['selector__k'] = param_dict['selector__k']
    eval_results['classifier__n_estimators'] = param_dict['classifier__n_estimators']
    eval_results['classifier__max_depth'] = param_dict['classifier__max_depth']
    eval_results['classifier__learning_rate'] = param_dict['classifier__learning_rate']
    eval_results['classifier__subsample'] = param_dict['classifier__subsample']
    eval_results['classifier__objective'] = param_dict['classifier__objective']
    eval_results['classifier__booster'] = param_dict['classifier__booster']

    results.append(eval_results)

results_df = pd.DataFrame(results)
results_df.to_excel("gridsearch_results.xlsx", index=False)

Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "max_depth", "subsample", "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "max_depth", "subsample", "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "use_label_encoder" } are not used.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Parameters: { "max_depth", "subsample", "use_label_encoder" } are not used.

  _

Grid Search tamamlandı, sonuçlar kaydedildi!
