In [1]:
from scipy.io import arff
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder

In [2]:
data, meta = arff.loadarff('dataset.arff')
df = pd.DataFrame(data)
for column in df.columns:
    df[column] = df[column].str.decode('utf-8').astype(int)
X = df.drop('Result', axis=1)
y = df['Result']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)

In [3]:
param_grid = {
    'learning_rate': [0.2],#[0.001, 0.01, 0.1, 0.2],
    'min_child_weight': [3],#[int(x) for x in range(1, 4, 1)],
    'n_estimators': [220],#[int(x) for x in range(10, 310, 10)],
    'max_depth': [10],#[None] + [int(x) for x in range(10, 120, 10)],
}

model = XGBClassifier()
grid_search = GridSearchCV(model, param_grid, scoring='accuracy', cv=3, n_jobs=-1)
grid_result = grid_search.fit(X_train, y_train)

In [4]:
print("Best Parameters: ", grid_result.best_params_)
print("Best Accuracy: ", grid_result.best_score_)

best_model = grid_result.best_estimator_

Best Parameters:  {'learning_rate': 0.2, 'max_depth': 10, 'min_child_weight': 3, 'n_estimators': 220}
Best Accuracy:  0.9698100407055632


In [5]:
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

Accuracy: 97.01%
