In [None]:
from google.colab import files

# Upload your CSV file
uploaded = files.upload()

Saving processed_data.csv to processed_data.csv


In [None]:
# Load the verified data
import pandas as pd
df = pd.read_csv('processed_data.csv')
print(f"Loaded {len(df)} examples for BERT training")

Loaded 300 examples for BERT training


In [None]:
# Install required libraries
!pip install transformers torch datasets accelerate

# Import necessary libraries
import torch
from transformers import (
    DistilBertTokenizer,
    DistilBertForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from torch.utils.data import Dataset
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from sklearn.model_selection import train_test_split
import pandas as pd

print("✅ Libraries installed and imported successfully!")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [None]:
import pandas as pd
import io

# Read and fix the CSV formatting (same issue as before)
with open('processed_data.csv', 'r', encoding='utf-8-sig') as f:
    content = f.read()

# Remove BOM and fix quote wrapping
content = content.replace('﻿', '')
lines = content.split('\n')

# Fix each line by removing outer quotes
fixed_lines = []
for line in lines:
    if line.strip():
        if line.startswith('"') and line.endswith('"'):
            line = line[1:-1]
        line = line.replace('""', '"')
        fixed_lines.append(line)

# Create properly formatted CSV
fixed_content = '\n'.join(fixed_lines)
df = pd.read_csv(io.StringIO(fixed_content))

print(f"Loaded {len(df)} examples for BERT training")
print(f"Columns: {df.columns.tolist()}")
print(f"Patterns: {df['pattern'].value_counts()}")

# Prepare data for BERT
X = df['text'].values
y = df['pattern'].values

# Create label mapping
unique_labels = df['pattern'].unique()
label2id = {label: i for i, label in enumerate(unique_labels)}
id2label = {i: label for label, i in label2id.items()}

print(f"Label mapping: {label2id}")

Loaded 300 examples for BERT training
Columns: ['narrative_id', 'text', 'pattern', 'source']
Patterns: pattern
pip_tactics               75
strategic_ambiguity       75
isolation_tactics         75
documentation_building    75
Name: count, dtype: int64
Label mapping: {'pip_tactics': 0, 'strategic_ambiguity': 1, 'isolation_tactics': 2, 'documentation_building': 3}


In [None]:
# Initialize tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Create custom dataset class
class WorkplaceDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]

        # Tokenize
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Convert labels to numeric
y_numeric = [label2id[label] for label in y]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_numeric, test_size=0.2, random_state=42, stratify=y_numeric
)

print(f"Training samples: {len(X_train)}")
print(f"Test samples: {len(X_test)}")

# Create datasets
train_dataset = WorkplaceDataset(X_train, y_train, tokenizer)
test_dataset = WorkplaceDataset(X_test, y_test, tokenizer)

print("✅ Datasets created successfully!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

Training samples: 240
Test samples: 60
✅ Datasets created successfully!


In [None]:
# Load pre-trained DistilBERT model
model = DistilBertForSequenceClassification.from_pretrained(
    'distilbert-base-uncased',
    num_labels=len(label2id),
    id2label=id2label,
    label2id=label2id
)

# Define training arguments (corrected parameter names)
training_args = TrainingArguments(
    output_dir='./bert_results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",  # Changed from evaluation_strategy
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="eval_accuracy",
    greater_is_better=True,
    dataloader_pin_memory=False,
    report_to="none", # Disable Weights & Biases logging
)

# Define metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='macro'
    )
    accuracy = accuracy_score(labels, predictions)

    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

print("✅ Model and training setup complete!")
print(f"Model loaded: DistilBERT with {len(label2id)} classes")
print(f"Training epochs: 3")
print(f"Batch size: 8 (CPU optimized)")

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model and training setup complete!
Model loaded: DistilBERT with 4 classes
Training epochs: 3
Batch size: 8 (CPU optimized)


In [None]:
# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

print("🚀 Starting BERT fine-tuning...")
print("This will take 10-15 minutes on CPU...")

# Train the model
trainer.train()

print("✅ Training completed!")

🚀 Starting BERT fine-tuning...
This will take 10-15 minutes on CPU...


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,1.2996,1.187703,1.0,1.0,1.0,1.0
2,0.4414,0.261833,1.0,1.0,1.0,1.0
3,0.0342,0.018442,1.0,1.0,1.0,1.0


✅ Training completed!


In [None]:
# Final evaluation and results (RUN AFTER TRAINING COMPLETES)
print("=== BERT FINE-TUNING RESULTS ===")

# Get final predictions
predictions = trainer.predict(test_dataset)
y_pred = np.argmax(predictions.predictions, axis=1)

# Convert back to label names
y_test_labels = [id2label[label] for label in y_test]
y_pred_labels = [id2label[pred] for pred in y_pred]

# Print detailed results
from sklearn.metrics import classification_report, confusion_matrix
print("Classification Report:")
print(classification_report(y_test_labels, y_pred_labels))

print(f"\nBERT F1 Score: {predictions.metrics['test_f1']:.3f}")
print(f"BERT Accuracy: {predictions.metrics['test_accuracy']:.3f}")
print(f"Target Achievement: {'✅ EXCEEDED' if predictions.metrics['test_f1'] > 0.75 else '⚠️ BELOW TARGET'}")

=== BERT FINE-TUNING RESULTS ===


Classification Report:
                        precision    recall  f1-score   support

documentation_building       1.00      1.00      1.00        15
     isolation_tactics       1.00      1.00      1.00        15
           pip_tactics       1.00      1.00      1.00        15
   strategic_ambiguity       1.00      1.00      1.00        15

              accuracy                           1.00        60
             macro avg       1.00      1.00      1.00        60
          weighted avg       1.00      1.00      1.00        60


BERT F1 Score: 1.000
BERT Accuracy: 1.000
Target Achievement: ✅ EXCEEDED


In [None]:
# Test with a new workplace narrative (DEVICE-CORRECTED)
test_narrative = """
After I questioned the new remote work policy in a team meeting, my manager suddenly started requiring me to submit daily written reports about my activities. These reports were never required before and no one else has to do them. Every email I send now gets forwarded to HR with additional commentary about my communication style. Small issues like joining a meeting two minutes late are now documented in writing when they never were before.
"""

# Get prediction (fix device placement)
device = next(model.parameters()).device  # Get model's device
inputs = tokenizer(test_narrative, return_tensors="pt", truncation=True, padding=True, max_length=512)

# Move inputs to same device as model
inputs = {key: value.to(device) for key, value in inputs.items()}

model.eval()
with torch.no_grad():
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(predictions, dim=-1).item()
    confidence = torch.max(predictions).item()

print("=== REAL USER TEST ===")
print(f"Input: {test_narrative[:100]}...")
print(f"Predicted Pattern: {id2label[predicted_class]}")
print(f"Confidence: {confidence:.3f}")
print(f"All Probabilities:")
for i, prob in enumerate(predictions[0]):
    print(f"  {id2label[i]}: {prob:.3f}")

=== REAL USER TEST ===
Input: 
After I questioned the new remote work policy in a team meeting, my manager suddenly started requir...
Predicted Pattern: strategic_ambiguity
Confidence: 0.254
All Probabilities:
  pip_tactics: 0.244
  strategic_ambiguity: 0.254
  isolation_tactics: 0.254
  documentation_building: 0.248


In [None]:
# Test multiple workplace scenarios with confidence analysis
test_scenarios = [
    {
        "text": "Placed on formal improvement plan after questioning budget decisions. Goals are vague and timeline unrealistic.",
        "expected": "pip_tactics"
    },
    {
        "text": "Manager gives different instructions in meetings versus private conversations. Won't clarify priorities in writing.",
        "expected": "strategic_ambiguity"
    },
    {
        "text": "Removed from team meetings without explanation. Colleagues avoid sharing project information with me.",
        "expected": "isolation_tactics"
    },
    {
        "text": "Every conversation now requires written follow-up. Minor issues become formal policy violations.",
        "expected": "documentation_building"
    },
    {
        "text": "Work has been stressful lately. Manager seems busy. Not sure what's happening with the project.",
        "expected": "unclear/ambiguous"
    }
]

print("=== CONFIDENCE THRESHOLD ANALYSIS ===")
device = next(model.parameters()).device

for i, scenario in enumerate(test_scenarios):
    # Tokenize and predict
    inputs = tokenizer(scenario["text"], return_tensors="pt", truncation=True, padding=True, max_length=512)
    inputs = {key: value.to(device) for key, value in inputs.items()}

    with torch.no_grad():
        outputs = model(**inputs)
        predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
        predicted_class = torch.argmax(predictions, dim=-1).item()
        confidence = torch.max(predictions).item()

    print(f"\n--- Scenario {i+1} ---")
    print(f"Text: {scenario['text'][:80]}...")
    print(f"Expected: {scenario['expected']}")
    print(f"Predicted: {id2label[predicted_class]}")
    print(f"Confidence: {confidence:.3f}")

    # Flag low confidence predictions
    if confidence < 0.6:
        print("⚠️ LOW CONFIDENCE - Human review recommended")
    elif confidence > 0.9:
        print("✅ HIGH CONFIDENCE - Reliable prediction")
    else:
        print("🔄 MEDIUM CONFIDENCE - Consider additional context")

=== CONFIDENCE THRESHOLD ANALYSIS ===

--- Scenario 1 ---
Text: Placed on formal improvement plan after questioning budget decisions. Goals are ...
Expected: pip_tactics
Predicted: pip_tactics
Confidence: 0.266
⚠️ LOW CONFIDENCE - Human review recommended

--- Scenario 2 ---
Text: Manager gives different instructions in meetings versus private conversations. W...
Expected: strategic_ambiguity
Predicted: strategic_ambiguity
Confidence: 0.268
⚠️ LOW CONFIDENCE - Human review recommended

--- Scenario 3 ---
Text: Removed from team meetings without explanation. Colleagues avoid sharing project...
Expected: isolation_tactics
Predicted: isolation_tactics
Confidence: 0.263
⚠️ LOW CONFIDENCE - Human review recommended

--- Scenario 4 ---
Text: Every conversation now requires written follow-up. Minor issues become formal po...
Expected: documentation_building
Predicted: documentation_building
Confidence: 0.278
⚠️ LOW CONFIDENCE - Human review recommended

--- Scenario 5 ---
Text: Work has been 