In [3]:
# Import necessary libraries
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb

# Generate a sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create DMatrix for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Set XGBoost parameters
params = {
    'max_depth': 3,
    'eta': 0.3,
    'objective': 'binary:logistic',
    'eval_metric': 'logloss'
}

# Train the model
num_round = 100
bst = xgb.train(params, dtrain, num_round)

# Make predictions
y_pred = bst.predict(dtest)
y_pred_binary = [1 if y > 0.5 else 0 for y in y_pred]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_binary)
print(f'Accuracy: {accuracy:.2f}')

# Feature importance
importance = bst.get_score(importance_type='weight')
sorted_importance = sorted(importance.items(), key=lambda x: x[1], reverse=True)
print("\nFeature Importance:")
for feature, score in sorted_importance[:5]:
    print(f"{feature}: {score}")

# Example of using the model for prediction
new_data = xgb.DMatrix(np.random.rand(1, 20))  # Create a random sample
prediction = bst.predict(new_data)
print(f"\nPrediction for new data: {prediction[0]:.2f}")

Accuracy: 0.91

Feature Importance:
f14: 96.0
f5: 73.0
f6: 37.0
f0: 29.0
f13: 28.0

Prediction for new data: 1.00
