In [None]:
from main import BatchDriftDetector, DriftDetectionConfig, DriftMethod


from datetime import datetime, timedelta
import json

print("=" * 80)
print("BATCH DRIFT DETECTOR - Example Usage")
print("=" * 80)

# Sample reference data
reference_texts = [
    "The stock market showed positive gains today.",
    "Technology companies announced new products.",
    "Economic indicators suggest steady growth.",
    "Investors are optimistic about future returns.",
    "Financial analysts predict market stability.",
] * 250  # 250 texts

# Current batch - similar domain (no drift expected)
batch1_texts = [
    "Market indices closed higher on Friday.",
    "Tech firms revealed innovative solutions.",
    "Financial metrics indicate stability.",
    "Shareholders expect positive outcomes.",
    "Economic forecasts remain bullish.",
] * 250  # 250 texts

# Current batch - different domain (drift expected)
batch2_texts = [
    "The weather forecast predicts heavy rain.",
    "Sports teams competed in the championship.",
    "Cooking recipes became popular online.",
    "Travel destinations attract many tourists.",
    "Entertainment news dominated headlines.",
] * 250  # 250 texts

# Initialize configuration
print("\n1. Creating configuration...")
config = DriftDetectionConfig(
    methods=[DriftMethod.DOMAIN_CLASSIFIER, DriftMethod.EMBEDDING],
    embedding_model='all-MiniLM-L6-v2',
    embedding_drift_method='mmd',
    report_dir='drift_reports',
    verbose=True
)

print(f"Configured methods: {[m.value for m in config.methods]}")

# Initialize detector
print("\n2. Initializing detector...")
detector = BatchDriftDetector(config)

# Fit on reference data
print("\n3. Fitting on reference data...")
detector.fit(reference_texts=reference_texts)

# Detect drift on batch 1 (no drift expected)
print("\n" + "=" * 80)
print("BATCH 1: Testing on NO DRIFT batch")
print("=" * 80)

time_start = datetime.utcnow()
time_end = time_start + timedelta(hours=1)

result1 = detector.detect(
    batch_id='batch_001',
    texts=batch2_texts,
    time_start=time_start,
    time_end=time_end
)


 




BATCH DRIFT DETECTOR - Example Usage

1. Creating configuration...
Configured methods: ['domain_classifier', 'embedding']

2. Initializing detector...

3. Fitting on reference data...
Caching reference texts for domain classifier...
✓ Fitting complete!
Fitting embedding detector on reference data...
Loading embedding model: all-MiniLM-L6-v2...
Pre-computing embeddings for reference data...


Batches: 100%|██████████| 40/40 [00:00<00:00, 140.83it/s]


✓ Cached embeddings shape: (1250, 384)
✓ Fitting complete!

BATCH 1: Testing on NO DRIFT batch
Training domain classifier...



datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).



✓ Report saved to drift_reports/domain_classifier_batch_001_20251208_010711.html
Generating embeddings for current data...


Batches: 100%|██████████| 40/40 [00:00<00:00, 139.87it/s]


Reference embeddings shape: (1250, 384)
Current embeddings shape: (1250, 384)
Calculating embedding drift using MMD (Maximum Mean Discrepancy) (bootstrap: True)...
✓ Report saved to drift_reports/embedding_batch_001_20251208_010711.html


In [2]:
for method, result in result1.method_results.items():
    print(f"\nMethod: {method}")
    print(f"Drift Detected: {result.drift_detected}")
    print(f"Details: {json.dumps(result.details, indent=4)}")


Method: domain_classifier
Drift Detected: True
Details: {
    "text_column_name": "text",
    "domain_classifier_roc_auc": 1.0,
    "random_classifier_95_percentile": 0.5257903440931235,
    "content_drift": true,
    "current": {
        "characteristic_examples": [
            "Financial metrics indicate stability.",
            "Financial metrics indicate stability.",
            "Financial metrics indicate stability.",
            "Shareholders expect positive outcomes.",
            "Economic forecasts remain bullish.",
            "Market indices closed higher on Friday.",
            "Economic forecasts remain bullish.",
            "Tech firms revealed innovative solutions.",
            "Financial metrics indicate stability.",
            "Tech firms revealed innovative solutions."
        ],
        "characteristic_words": [
            "bullish",
            "forecasts",
            "remain",
            "closed",
            "higher",
            "indices",
            "fr

In [None]:

print(f"\nResults:")
print(f"  Batch ID: {result1.batch_id}")
print(f"  Drift Detected: {result1.drift_detected}")
print(f"  Overall Severity: {result1.drift_severity}")
print(f"  Timestamp: {result1.timestamp}")
print(f"\n  Aggregation Details:")
print(f"    - Total Methods: {result1.aggregation_details['total_methods']}")
print(f"    - Methods Detecting Drift: {result1.aggregation_details['methods_detected_drift']}")
print(f"    - Detection Rate: {result1.aggregation_details['detection_rate']:.1%}")
print(f"\n  Individual Method Results:")
for method_name, method_result in result1.method_results.items():
    print(f"    [{method_name}]")
    print(f"      Drift: {method_result.drift_detected}")
    print(f"      Score: {method_result.drift_score:.4f}")
    print(f"      Severity: {method_result.severity}")
    print(f"      Report: {method_result.details.get('report_path', 'N/A')}")

# Detect drift on batch 2 (drift expected)
print("\n" + "=" * 80)
print("BATCH 2: Testing on WITH DRIFT batch")
print("=" * 80)

time_start = datetime.utcnow()
time_end = time_start + timedelta(hours=1)

result2 = detector.detect(
    batch_id='batch_002',
    texts=batch2_texts,
    time_start=time_start,
    time_end=time_end
)

print(f"\nResults:")
print(f"  Batch ID: {result2.batch_id}")
print(f"  Drift Detected: {result2.drift_detected}")
print(f"  Overall Severity: {result2.drift_severity}")
print(f"  Timestamp: {result2.timestamp}")
print(f"\n  Aggregation Details:")
print(f"    - Total Methods: {result2.aggregation_details['total_methods']}")
print(f"    - Methods Detecting Drift: {result2.aggregation_details['methods_detected_drift']}")
print(f"    - Detection Rate: {result2.aggregation_details['detection_rate']:.1%}")
print(f"\n  Individual Method Results:")
for method_name, method_result in result2.method_results.items():
    print(f"    [{method_name}]")
    print(f"      Drift: {method_result.drift_detected}")
    print(f"      Score: {method_result.drift_score:.4f}")
    print(f"      Severity: {method_result.severity}")
    print(f"      Report: {method_result.details.get('report_path', 'N/A')}")

# Export to JSON
print("\n" + "=" * 80)
print("EXPORTING RESULTS")
print("=" * 80)

result1_dict = result1.to_dict()
result2_dict = result2.to_dict()

print(f"\n  Batch 1 JSON keys: {list(result1_dict.keys())}")
print(f"  Batch 2 JSON keys: {list(result2_dict.keys())}")

print("\n" + "=" * 80)
print("Example complete! Check drift_reports/ for HTML reports.")
print("=" * 80)






