*** Purpose: Controlled, agentified retraining

In [10]:
import joblib
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report
from collections import Counter
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer, DistilBertModel
import torch

In [11]:
# Step 1: Load label encoder + agent log
le_base = joblib.load("label_encoder.joblib")
df = pd.read_csv("agent_log.csv")
df_reviewed = df[(df["reviewed"] == "Yes") & (df["true_label"].notna())]

In [12]:
# Step 2: Validate class names
original_classes = set(le_base.classes_)
reviewed_labels = set(df_reviewed["true_label"].unique())
invalid_labels = reviewed_labels - original_classes
if invalid_labels:
    raise ValueError(f"❌ Invalid labels: {invalid_labels}")
print("✅ All reviewed labels are valid")

✅ All reviewed labels are valid


In [13]:
# Step 3: Get label ids and names
reviewed_class_names = sorted(reviewed_labels)
label_to_id = {label: int(le_base.transform([label])[0]) for label in reviewed_class_names}
present_label_ids = list(label_to_id.values())
target_names = list(label_to_id.keys())

In [14]:
# Step 4: Prepare X, y
texts = df_reviewed["text"].astype(str).tolist()
y = le_base.transform(df_reviewed["true_label"])

In [15]:
# Filter out classes with <2 samples
class_counts = Counter(y)
valid_idx = [i for i, label in enumerate(y) if class_counts[label] >= 2]
texts = [texts[i] for i in valid_idx]
y = [y[i] for i in valid_idx]

In [16]:
# Step 5: BERT embeddings
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
bert = DistilBertModel.from_pretrained("distilbert-base-uncased").to(device)
bert.eval()

def get_embeddings(texts):
    vecs = []
    for text in texts:
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True).to(device)
        with torch.no_grad():
            out = bert(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()
            vecs.append(out[0])
    return np.vstack(vecs)

X = get_embeddings(texts)

In [17]:
# Step 6: Train/Val split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


In [18]:
# Step 7: Train model
clf_new = LogisticRegression(max_iter=1000, random_state=42)
clf_new.fit(X_train, y_train)


0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,42
,solver,'lbfgs'
,max_iter,1000


In [19]:
# Step 8: Predict and evaluate
y_pred_new = clf_new.predict(X_val)
report_new = classification_report(
    y_val, y_pred_new,
    labels=present_label_ids,
    target_names=target_names,
    output_dict=True
)

print("✅ Retraining complete. Metrics for reviewed classes only.")

✅ Retraining complete. Metrics for reviewed classes only.


In [20]:
# Step 9: Load base model
clf_base = joblib.load("logreg_model.joblib")

# Predict with base model on same validation set
y_pred_base = clf_base.predict(X_val)

# Generate base report on same subset of classes
report_base = classification_report(
    y_val, y_pred_base,
    labels=present_label_ids,
    target_names=target_names,
    output_dict=True
)

# Threshold config
F1_IMPROVEMENT_THRESHOLD = 0.05  # 5% boost required
updated_classes = []

# Evaluate F1 improvement class-wise
for cls in target_names:
    f1_new = report_new[cls]["f1-score"]
    f1_base = report_base.get(cls, {}).get("f1-score", 0)

    print(f"\n🔍 Checking class: {cls}")
    print(f"   F1 (base): {f1_base:.4f}")
    print(f"   F1 (new) : {f1_new:.4f}")

    if f1_new >= f1_base * (1 + F1_IMPROVEMENT_THRESHOLD):
        print(f"✅ Class '{cls}' improved: {f1_base:.2f} → {f1_new:.2f}")
        updated_classes.append(cls)
    else:
        print(f"⛔ Class '{cls}' not improved: {f1_base:.2f} → {f1_new:.2f}")



🔍 Checking class: credit_card
   F1 (base): 0.0000
   F1 (new) : 1.0000
✅ Class 'credit_card' improved: 0.00 → 1.00

🔍 Checking class: debt_collection
   F1 (base): 0.0952
   F1 (new) : 1.0000
✅ Class 'debt_collection' improved: 0.10 → 1.00


In [21]:
if updated_classes:
    from datetime import datetime
    version = datetime.now().strftime("v2_%Y%m%d_%H%M")

    joblib.dump(clf_new, f"logreg_model_{version}.joblib")
    joblib.dump(le_base, f"label_encoder_{version}.joblib")

    print(f"🎉 Saved updated model: logreg_model_{version}.joblib")
else:
    print("❌ No eligible class improved. Base model retained.")


🎉 Saved updated model: logreg_model_v2_20250713_1436.joblib


In [22]:
###############################