In [None]:
## import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, classification_report
from sklearn.preprocessing import LabelEncoder

In [None]:
# Load the data
data_path = '/mnt/data/Verizon Data-1 (1).csv'
verizon_data = pd.read_csv(data_path)

In [None]:
# Step 1: Preprocess the data
# Fill missing values
verizon_data.fillna(verizon_data.median(), inplace=True)

# Encode categorical variables (e.g., gender and pmttype)
label_encoder = LabelEncoder()
verizon_data['gender'] = label_encoder.fit_transform(verizon_data['gender'])
verizon_data['pmttype'] = label_encoder.fit_transform(verizon_data['pmttype'])

# Define features and target variable
X = verizon_data.drop(columns=['default'])
y = verizon_data['default']

In [None]:
# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [None]:
# Step 3: Train the XGBoost model
xgb_model = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_model.fit(X_train, y_train)

In [None]:
# Step 4: Evaluate the model
y_pred = xgb_model.predict(X_test)
y_proba = xgb_model.predict_proba(X_test)[:, 1]  # Probability of the positive class

# Performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_proba)

# Print results
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"ROC-AUC: {roc_auc:.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
