In [2]:
!pip install torch --quiet
!pip install transformers --quiet

In [3]:
# Step 1 — Load base DistilBERT feature extractor

from transformers import DistilBertTokenizer, DistilBertModel
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load tokenizer + base encoder
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
bert = DistilBertModel.from_pretrained("distilbert-base-uncased")
bert.to(device)
bert.eval()


  from pandas.core.computation.check import NUMEXPR_INSTALLED


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

DistilBertModel(
  (embeddings): Embeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer): Transformer(
    (layer): ModuleList(
      (0-5): 6 x TransformerBlock(
        (attention): DistilBertSdpaAttention(
          (dropout): Dropout(p=0.1, inplace=False)
          (q_lin): Linear(in_features=768, out_features=768, bias=True)
          (k_lin): Linear(in_features=768, out_features=768, bias=True)
          (v_lin): Linear(in_features=768, out_features=768, bias=True)
          (out_lin): Linear(in_features=768, out_features=768, bias=True)
        )
        (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (ffn): FFN(
          (dropout): Dropout(p=0.1, inplace=False)
          (lin1): Linear(in_features=768, out_features=3072, bias=True)
          (lin2): L

In [4]:
# Step 2 — Load classifier and label encoder
import joblib

clf = joblib.load("logreg_model.joblib")
le = joblib.load("label_encoder.joblib")



In [5]:
#Step 3 — Define predict(text) function
def predict(text):
    # Tokenize
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)

    # BERT embedding
    with torch.no_grad():
        outputs = bert(**inputs)
        pooled = outputs.last_hidden_state.mean(dim=1).cpu().numpy()  # Mean pooling

    # Predict
    pred_idx = clf.predict(pooled)[0]
    confidence = clf.predict_proba(pooled).max()
    label = le.inverse_transform([pred_idx])[0]

    return pred_idx,label, confidence


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32485 entries, 0 to 32484
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  32485 non-null  int64 
 1   product     32485 non-null  object
 2   narrative   32484 non-null  object
dtypes: int64(1), object(2)
memory usage: 761.5+ KB


In [8]:
#Step 4 — Test on sample complaints
import pandas as pd
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score

df = pd.read_csv("downloads/complaints_test.csv")
df_clean = df[["narrative", "product"]].dropna().astype(str)
texts = df_clean["narrative"].tolist()
y_true = le.transform(df_clean["product"].tolist())
y_pred = []

for i, text in enumerate(texts):
    pred_idx,label, conf = predict(text)
    y_pred.append(pred_idx)
    #print(f"[{i+1}] → {label} (confidence: {conf:.2f})")

# 🔥 Step 3: Print metrics
print("\n--- NLP Model Evaluation ---")
print("Precision:", precision_score(y_true, y_pred, average="macro"))
print("Recall:", recall_score(y_true, y_pred, average="macro"))
print("F1 Score:", f1_score(y_true, y_pred, average="macro"))
print("\n", classification_report(y_true, y_pred, target_names=le.classes_))


--- NLP Model Evaluation ---
Precision: 0.45114463030743124
Recall: 0.5217442597603732
F1 Score: 0.40755532361446944

                      precision    recall  f1-score   support

        credit_card       0.29      0.41      0.34      3114
   credit_reporting       0.93      0.31      0.47     18235
    debt_collection       0.54      0.45      0.49      4630
mortgages_and_loans       0.24      0.58      0.34      3798
     retail_banking       0.26      0.85      0.40      2707

           accuracy                           0.42     32484
          macro avg       0.45      0.52      0.41     32484
       weighted avg       0.67      0.42      0.44     32484



In [10]:
import time

start = time.time()
for text in texts:
    _ = predict(text)  # ignore results, just time it
end = time.time()

latency_per_tx = (end - start) / len(texts)
print(f"Avg Inference Latency per Tx: {latency_per_tx:.6f} sec")


Avg Inference Latency per Tx: 0.008812 sec
