In [1]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import torch
from torch.utils.data import DataLoader, TensorDataset

# from get_dataset import X
# from get_dataset import y

# Step 1: Split the data into training and testing sets


from get_dataset import X_train_tensor as X_train, X_test_tensor as X_test, y_train_tensor as y_train, y_test_tensor as y_test


Dataset preview:
   age           job  marital  education default  balance housing loan  \
0   58    management  married   tertiary      no     2143     yes   no   
1   44    technician   single  secondary      no       29     yes   no   
2   33  entrepreneur  married  secondary      no        2     yes  yes   
3   47   blue-collar  married    unknown      no     1506     yes   no   
4   33       unknown   single    unknown      no        1      no   no   

   contact  day month  duration  campaign  pdays  previous poutcome   y  
0  unknown    5   may       261         1     -1         0  unknown  no  
1  unknown    5   may       151         1     -1         0  unknown  no  
2  unknown    5   may        76         1     -1         0  unknown  no  
3  unknown    5   may        92         1     -1         0  unknown  no  
4  unknown    5   may       198         1     -1         0  unknown  no  

Dataset shape: (45211, 17)

Categorical columns: ['job', 'marital', 'education', 'default', '

In [2]:

# Step 2: Train XGBoost model with L2 (R2) regularization
print("Training XGBoost model with L2 regularization...")
xgb_model = xgb.XGBClassifier(
    n_estimators=200,
    learning_rate=0.2,
    max_depth=4,
    subsample=0.8,
    colsample_bytree=0.9,
    reg_lambda=10,  # L2 Regularization
    objective='binary:logistic',
    random_state=42
)
xgb_model.fit(X_train, y_train)

# Step 3: Make predictions and evaluate the model
y_pred = xgb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"XGBoost Model Accuracy: {accuracy:.4f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Step 4: PyTorch integration
X_train_tensor = torch.FloatTensor(X_train)
y_train_tensor = torch.FloatTensor(y_train)
X_test_tensor = torch.FloatTensor(X_test)
y_test_tensor = torch.FloatTensor(y_test)

train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Training XGBoost with PyTorch DataLoader
def train_xgb_with_pytorch_loader(train_loader):
    all_X, all_y = [], []
    for X_batch, y_batch in train_loader:
        all_X.append(X_batch.numpy())
        all_y.append(y_batch.numpy())
    
    X_train_combined = np.vstack(all_X)
    y_train_combined = np.concatenate(all_y)
    
    xgb_model_from_loader = xgb.XGBClassifier(
        n_estimators=100,
        learning_rate=0.1,
        max_depth=4,
        reg_lambda=10,
        objective='binary:logistic',
        random_state=42
    )
    xgb_model_from_loader.fit(X_train_combined, y_train_combined)
    return xgb_model_from_loader

xgb_model_pytorch = train_xgb_with_pytorch_loader(train_loader)

def evaluate_models(xgb_model, test_loader):
    X_test_np = X_test
    y_test_np = y_test
    y_pred_xgb = xgb_model.predict(X_test_np)
    xgb_accuracy = accuracy_score(y_test_np, y_pred_xgb)
    print(f"\nXGBoost Final Test Accuracy: {xgb_accuracy:.4f}")
    return xgb_accuracy

evaluate_models(xgb_model, test_loader)

Training XGBoost model with L2 regularization...
XGBoost Model Accuracy: 0.9064

Classification Report:
              precision    recall  f1-score   support

         0.0       0.93      0.96      0.95      7952
         1.0       0.65      0.48      0.55      1091

    accuracy                           0.91      9043
   macro avg       0.79      0.72      0.75      9043
weighted avg       0.90      0.91      0.90      9043


Confusion Matrix:
[[7673  279]
 [ 567  524]]

XGBoost Final Test Accuracy: 0.9064


0.9064469755612076

In [3]:
tree_info = xgb_model_pytorch.get_booster().get_dump(dump_format='text')[0]
print(tree_info)

0:[f3<1.01286077] yes=1,no=2,missing=2
	1:[f40<1] yes=3,no=4,missing=4
		3:[f3<-0.19867231] yes=7,no=8,missing=8
			7:[f34<1] yes=15,no=16,missing=16
				15:leaf=-0.104590856
				16:leaf=0.0884422883
			8:[f24<1] yes=17,no=18,missing=18
				17:leaf=0.0118087353
				18:leaf=-0.082920596
		4:[f3<-0.470490605] yes=9,no=10,missing=10
			9:[f35<1] yes=19,no=20,missing=20
				19:leaf=0.03941774
				20:leaf=-0.0352685489
			10:[f8<1] yes=21,no=22,missing=22
				21:leaf=0.34413144
				22:leaf=0.0194490533
	2:[f3<2.06518579] yes=5,no=6,missing=6
		5:[f27<1] yes=11,no=12,missing=12
			11:[f40<1] yes=23,no=24,missing=24
				23:leaf=0.132589713
				24:leaf=0.2643556
			12:[f3<1.50213373] yes=25,no=26,missing=26
				25:leaf=-0.0140478862
				26:leaf=0.055812031
		6:[f1<-0.579450667] yes=13,no=14,missing=14
			13:[f1<-0.597186208] yes=27,no=28,missing=28
				27:leaf=0.0536173955
				28:leaf=-0.00673414348
			14:[f5<3.50354981] yes=29,no=30,missing=30
				29:leaf=0.260466188
				30:leaf=0.00689533306

In [5]:
xgb.to_graphviz(xgb_model_pytorch, num_trees=0).save('tree0.dot')

'tree0.dot'

SyntaxError: invalid syntax (4280592370.py, line 1)