In [5]:
from datasets import load_dataset
import numpy as np
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')
train_df = pd.read_pickle("/content/drive/My Drive/CMPE 257/CMPE 257 Colab/257 Sensitive Data Input Guardrail/train_df_embedding.pkl")
test_df = pd.read_pickle("/content/drive/My Drive/CMPE 257/CMPE 257 Colab/257 Sensitive Data Input Guardrail/test_df_embedding.pkl")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
import xgboost as xgb
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import classification_report, accuracy_score
from sklearn.model_selection import train_test_split

# prepare the data
X_train_texts = train_df["source_text"]
y_train = train_df["is_sensitive"]

X_test_texts = test_df["source_text"]
y_test = test_df["is_sensitive"]

# vectorize text with TF-IDF
vectorizer = TfidfVectorizer(max_features = 1500, ngram_range = (1, 2), stop_words = "english")

X_train = vectorizer.fit_transform(X_train_texts)
X_test = vectorizer.transform(X_test_texts)

# train xgboost classifier
xgb_clf = xgb.XGBClassifier(objective = "binary:logistic", eval_metric = "logloss", max_depth = None, learning_rate = 0.1, n_estimators = 500,
    subsample = 0.8, # random rows to prevent overfitting and add diversity
    colsample_bytree = 0.8, # random columns to reduces feature correlation
    random_state = 42
)

xgb_clf.fit(X_train, y_train)

# evaluate
y_pred = xgb_clf.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits = 3))


feature_names = vectorizer.get_feature_names_out()
importances = xgb_clf.feature_importances_
indices = np.argsort(importances)[::-1][:20]

print("\nTop 20 Important Words for Sensitive Detection:")
for i in indices:
    print(f"{feature_names[i]}: {importances[i]:.4f}")


Accuracy: 0.9209665240372514

Classification Report:
              precision    recall  f1-score   support

           0      0.708     0.525     0.603       908
           1      0.941     0.972     0.956      7038

    accuracy                          0.921      7946
   macro avg      0.824     0.749     0.780      7946
weighted avg      0.914     0.921     0.916      7946


Top 20 Important Words for Sensitive Detection:
com: 0.0215
street: 0.0156
passport: 0.0112
number: 0.0111
road: 0.0103
eng: 0.0094
username: 0.0090
ip: 0.0075
sex: 0.0067
11: 0.0062
city: 0.0059
id: 0.0058
pm: 0.0055
postcode: 0.0054
country: 0.0054
clock: 0.0052
head meta: 0.0051
introduction: 0.0049
02: 0.0048
password: 0.0047


In [34]:
# Embeddings and PCA with 80% variance

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# building feature matrices from embedding column
X_train = np.vstack(train_df["embedding"].values)  # shape - (n_train, embedding_dim)
X_test  = np.vstack(test_df["embedding"].values)   # shape - (n_test, embedding_dim)

y_train = train_df["is_sensitive"].values
y_test  = test_df["is_sensitive"].values


# scaling embeddings before logistic regression
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)


# fit pca with 114 components
pca = PCA(n_components = 114) # 114 components to capture 80% of variance
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

# train xgboost classifier
xgb_clf = xgb.XGBClassifier(objective = "binary:logistic", eval_metric = "logloss", max_depth = None, learning_rate = 0.15, n_estimators = 1100,
    subsample = 0.8, # random rows to prevent overfitting and add diversity
    colsample_bytree = 0.8, # random columns to reduces feature correlation
    random_state = 42
)

xgb_clf.fit(X_train, y_train)

# evaluate
y_pred = xgb_clf.predict(X_test)

print("\nAccuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits = 3))


feature_names = vectorizer.get_feature_names_out()
importances = xgb_clf.feature_importances_
indices = np.argsort(importances)[::-1][:20]

print("\nTop 20 Important Words for Sensitive Detection:")
for i in indices:
    print(f"{feature_names[i]}: {importances[i]:.4f}")


Accuracy: 0.9288950415303298

Classification Report:
              precision    recall  f1-score   support

           0      0.754     0.561     0.643       908
           1      0.945     0.976     0.961      7038

    accuracy                          0.929      7946
   macro avg      0.850     0.768     0.802      7946
weighted avg      0.923     0.929     0.924      7946


Top 20 Important Words for Sensitive Detection:
00: 0.1039
00 00: 0.0390
000: 0.0278
12th: 0.0254
03: 0.0248
10th: 0.0190
06: 0.0171
15th: 0.0160
17: 0.0149
1988: 0.0127
17th: 0.0124
1987: 0.0123
08: 0.0118
11th: 0.0109
150: 0.0109
1995: 0.0109
00 email: 0.0104
14th: 0.0101
1980: 0.0099
1975: 0.0098
