In [2]:
!pip install sentence-transformers boto3 numpy

Collecting sentence-transformers
  Using cached sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Using cached sentence_transformers-4.1.0-py3-none-any.whl (345 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-4.1.0


In [3]:
# COMPLETE ENHANCED TECHTRANSLATOR IMPLEMENTATION
import boto3
import json
from sentence_transformers import SentenceTransformer
import time

print("🚀 ENHANCED TECHTRANSLATOR IMPLEMENTATION")
print("=" * 60)

# Configuration - UPDATE THESE IF DIFFERENT
BUCKET_NAME = "tech-translator-s3-knowledge-base"
TABLE_NAME = "tech-translator-dynamodb-vector-storage"

# Initialize AWS clients and model
print("📦 Initializing AWS clients and model...")
s3 = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
model = SentenceTransformer('all-MiniLM-L6-v2')
print("✅ Initialization complete")

# Enhanced concepts with FLAN-T5 optimization
enhanced_concepts = [
    {
        "concept_id": "r-squared",
        "title": "R-squared",
        "content": {
            "definition": "R-squared (R²) measures how much of the premium variation your pricing model explains. Values range from 0 (explains nothing) to 1 (perfect prediction). In insurance, R-squared of 0.60-0.80 is typical for good pricing models.",
            "technical_details": "Calculated as 1 minus (residual sum of squares / total sum of squares). Higher R-squared means rating factors capture more risk variation. Adjusted R-squared penalizes adding weak variables to prevent overfitting.",
            "insurance_context": "R-squared shows pricing model quality. High R-squared (above 0.7) indicates strong risk factor selection. Low R-squared (below 0.5) suggests missing important rating variables or poor model specification.",
            "limitations": "R-squared always increases when adding variables, even irrelevant ones. Doesn't indicate causation. Very high R-squared (above 0.9) may signal overfitting. Focus on out-of-sample validation alongside R-squared."
        },
        "audience_explanations": {
            "underwriter": "R-squared tells you how well your pricing captures risk. R-squared of 0.75 means your model explains 75% of why premiums differ across policies. The missing 25% could be risk factors competitors are using. If R-squared drops below 0.6, review your rating plan for missing variables like credit score or claims frequency.",
            "actuary": "When building GLMs, R-squared helps validate model performance versus complexity trade-offs. Compare training R-squared (0.78) to validation R-squared (0.72) - small gaps indicate good generalization. Use adjusted R-squared when comparing models with different variable counts. Benchmark against prior models and industry standards.",
            "executive": "R-squared measures how well your pricing model works. R-squared of 0.8 means you're capturing 80% of what drives premium differences - strong competitive positioning. The remaining 20% represents opportunity if you can identify better predictive factors than competitors. Track R-squared trends quarterly to monitor model degradation."
        },
        "examples": [
            {
                "context": "Auto Insurance Pricing Model",
                "explanation": "Your personal auto model achieves R-squared of 0.72 using age, territory, vehicle symbol, and prior claims. This means these four factors explain 72% of premium variation across your book. Adding credit score might boost R-squared to 0.76, capturing additional risk differentiation."
            },
            {
                "context": "Commercial Property Model Performance", 
                "explanation": "Your commercial property GLM shows R-squared of 0.68 for new business but only 0.52 for renewals. This gap suggests risk factors change over policy lifetime, requiring different rating approaches for new versus renewal business."
            },
            {
                "context": "Model Validation Process",
                "explanation": "Model A: R-squared 0.75 with 12 variables. Model B: R-squared 0.73 with 8 variables. Model B may be better - similar predictive power with less complexity, reducing overfitting risk and implementation costs."
            }
        ],
        "related_concepts": ["predictive-model", "loss-ratio", "statistical-significance"],
        "action_guidance": {
            "underwriter": "If R-squared below 0.5: Request actuarial review of rating plan. If above 0.8: Validate against holdout data for overfitting. Monitor R-squared trends monthly to catch model degradation early.",
            "actuary": "Target R-squared 0.65-0.8 for most lines. Use cross-validation to verify stability. Document R-squared benchmarks by line of business. Include R-squared in quarterly model monitoring reports.",
            "executive": "R-squared trends indicate competitive positioning. Declining R-squared suggests competitors improving faster. Invest in analytics if R-squared consistently below industry benchmarks."
        }
    },
    {
        "concept_id": "loss-ratio",
        "title": "Loss Ratio",
        "content": {
            "definition": "Loss ratio equals incurred losses divided by earned premiums, expressed as percentage. Shows how much of premium dollar goes to claims. Combined with expense ratio to measure total profitability.",
            "technical_details": "Formula: (Incurred Losses + Loss Adjustment Expenses) / Earned Premiums × 100%. Includes both paid claims and reserves. Calendar year versus accident year calculations provide different insights. IBNR reserves significantly impact long-tail lines.",
            "insurance_context": "Loss ratio is primary profitability metric. Target varies by line: personal auto 65-75%, commercial property 50-60%, workers comp 60-70%. Combined ratio (loss ratio + expense ratio) below 100% indicates underwriting profit.",
            "limitations": "Loss ratios volatile for small books or low-frequency lines. Don't reflect investment income. Reserves estimates affect current ratios. Catastrophe losses create temporary spikes. Time lag between premium collection and claim payments."
        },
        "audience_explanations": {
            "underwriter": "Loss ratio shows claim dollars per premium dollar. 75% loss ratio means $75 claims for every $100 premium. With 25% expenses, combined ratio is 100% - breakeven. Watch for trending: 65% to 75% over three quarters signals deterioration requiring rate action or underwriting tightening.",
            "actuary": "Monitor loss ratios by accident year for trend analysis. Calendar year ratios affected by reserve development. Use ultimate loss ratios for pricing - factor in expected development. Compare actual to expected ratios to validate pricing assumptions and identify systematic biases.",
            "executive": "Loss ratio directly impacts profitability. 5-point deterioration on $100M book costs $5M annually. Combined ratio above 105% unsustainable long-term. Track loss ratio trends versus competitors - consistent underperformance indicates strategic pricing or underwriting issues."
        },
        "examples": [
            {
                "context": "Personal Auto Performance",
                "explanation": "Q3 loss ratio: 78% versus 72% target. With 27% expense ratio, combined ratio is 105% - losing 5 cents per premium dollar. Need 8% rate increase or underwriting improvements to restore target 97% combined ratio."
            },
            {
                "context": "Commercial Lines Analysis",
                "explanation": "Workers comp loss ratios: Year 1: 65%, Year 2: 71%, Year 3: 68%. Volatility normal for long-tail coverage. Focus on ultimate loss ratios: 67% average suggests adequate pricing despite annual fluctuations."
            },
            {
                "context": "Catastrophe Impact",
                "explanation": "Homeowners loss ratio spiked to 95% in Q2 due to hail storms versus 58% normal. Exclude CAT losses to see underlying 62% loss ratio - within target range. CAT reinsurance recovered 15 points of loss ratio impact."
            }
        ],
        "related_concepts": ["combined-ratio", "expense-ratio", "underwriting-profit"],
        "action_guidance": {
            "underwriter": "Loss ratio above target: Tighten guidelines, request rate increase. Below target by 10+ points: Review for adequate pricing. Monitor monthly, act on quarterly trends.",
            "actuary": "Establish loss ratio targets by line and vintage. Build early warning system for 5+ point deterioration. Include CAT-adjusted ratios in executive reporting.",
            "executive": "Loss ratio trends predict earnings. Budget assumes combined ratios. Monitor competitor ratios - persistent gaps indicate strategic issues requiring attention."
        }
    },
    {
        "concept_id": "predictive-model", 
        "title": "Predictive Model",
        "content": {
            "definition": "Statistical algorithm using historical data to predict future outcomes. In insurance, predicts claim likelihood, loss costs, or customer behavior. Common types: GLM, random forest, gradient boosting, neural networks.",
            "technical_details": "Models learn patterns from training data to make predictions on new data. Evaluated using accuracy, precision, recall, AUC, lift curves. Require feature engineering, validation, and ongoing monitoring. Production models need A/B testing and performance tracking.",
            "insurance_context": "Predictive models optimize pricing, underwriting, claims handling, and marketing. Enable granular risk segmentation and personalized products. Regulatory requirements vary by state for rate filings and protected class usage.",
            "limitations": "Models only capture historical patterns. Performance degrades over time requiring retraining. Black-box models lack explainability. Biased training data creates unfair outcomes. Overfitting reduces real-world performance."
        },
        "audience_explanations": {
            "underwriter": "Predictive models score applications for risk. Score of 850 means characteristics similar to policies with 2.5x normal claim frequency. Use scores with judgment - models complement, don't replace, underwriting expertise. High scores trigger manual review, low scores enable straight-through processing.",
            "actuary": "Focus on model validation: training accuracy 78%, holdout 74% shows good generalization. Monitor model drift - performance declining indicates retraining needed. Document assumptions, limitations, and validation results for regulatory filings. Balance predictive power with interpretability requirements.",
            "executive": "Predictive models provide competitive advantage through better risk selection. Customer retention model improved renewal rates 6%, worth $3M annually. Investment in advanced analytics pays off: 5-point combined ratio improvement from enhanced pricing models generates $25M profit on $500M book."
        },
        "examples": [
            {
                "context": "Claims Fraud Detection",
                "explanation": "Fraud model analyzes 50+ variables to score claims 0-1000. Scores above 800 trigger special investigation unit review. Model identifies 15% of fraudulent claims using 5% of investigation resources, improving ROI 3x."
            },
            {
                "context": "Customer Lifetime Value",
                "explanation": "CLV model predicts 5-year customer value using demographics, policy features, and behavior. Guides acquisition spending: high CLV prospects get premium marketing, low CLV prospects get basic digital campaigns."
            },
            {
                "context": "Dynamic Pricing Model",
                "explanation": "Real-time pricing model updates rates based on current market conditions, competitor analysis, and inventory levels. Enables 5% rate optimization versus static annual rates, improving competitiveness and margins."
            }
        ],
        "related_concepts": ["r-squared", "machine-learning", "risk-assessment"],
        "action_guidance": {
            "underwriter": "Trust model scores but investigate outliers. Document override reasons for audit trail. Request model retraining if override rates exceed 15%.",
            "actuary": "Validate models quarterly. Track performance metrics. Plan retraining when accuracy drops 5+ points. Document model governance for regulatory compliance.", 
            "executive": "Invest in model infrastructure for competitive advantage. Track business impact: retention rates, loss ratios, market share. ROI targets: 3x for pricing models, 5x for fraud detection."
        }
    }
]

print(f"📚 Loaded {len(enhanced_concepts)} enhanced concepts")

# STEP 1: Upload Enhanced Knowledge Base to S3
print("\n" + "="*50)
print("🔄 STEP 1: Uploading Enhanced Knowledge Base to S3")
print("="*50)

upload_success = True
try:
    for concept in enhanced_concepts:
        concept_id = concept["concept_id"]
        file_content = json.dumps(concept, indent=2)
        key = f"concepts/{concept_id}.json"
        
        s3.put_object(
            Bucket=BUCKET_NAME,
            Key=key,
            Body=file_content,
            ContentType='application/json'
        )
        print(f"  ✅ Uploaded {concept_id}.json")
    
    print(f"\n✅ STEP 1 COMPLETE: Enhanced knowledge base uploaded to {BUCKET_NAME}")
    
except Exception as e:
    print(f"❌ ERROR in Step 1: {str(e)}")
    upload_success = False

# STEP 2: Clear Old Embeddings from DynamoDB
print("\n" + "="*50)
print("🔄 STEP 2: Clearing Old Embeddings from DynamoDB")
print("="*50)

clear_success = True
try:
    table = dynamodb.Table(TABLE_NAME)
    
    # Scan for all existing items
    print("  🔍 Scanning for existing embeddings...")
    response = table.scan()
    old_items = response.get('Items', [])
    
    if old_items:
        print(f"  🗑️ Found {len(old_items)} old embeddings to delete...")
        
        # Delete old items
        deleted_count = 0
        for item in old_items:
            try:
                table.delete_item(
                    Key={
                        'concept_id': item['concept_id'],
                        'vector_id': item['vector_id']
                    }
                )
                deleted_count += 1
                if deleted_count % 5 == 0:  # Progress indicator
                    print(f"    Deleted {deleted_count}/{len(old_items)} items...")
            except Exception as delete_error:
                print(f"    ⚠️ Error deleting {item.get('vector_id', 'unknown')}: {str(delete_error)}")
        
        print(f"  ✅ Deleted {deleted_count} old embeddings")
    else:
        print("  ℹ️ No old embeddings found to delete")
    
    print(f"\n✅ STEP 2 COMPLETE: DynamoDB table cleared")
    
except Exception as e:
    print(f"❌ ERROR in Step 2: {str(e)}")
    print("⚠️ Continuing anyway - this is not critical for the upgrade")
    clear_success = False

# STEP 3: Generate Enhanced Embeddings
print("\n" + "="*50)
print("🔄 STEP 3: Generating Enhanced Embeddings Optimized for FLAN-T5")
print("="*50)

embedding_success = True
total_chunks = 0

try:
    table = dynamodb.Table(TABLE_NAME)
    
    for concept_num, concept in enumerate(enhanced_concepts, 1):
        concept_id = concept["concept_id"]
        title = concept["title"]
        
        print(f"\n📖 Processing Concept {concept_num}/{len(enhanced_concepts)}: {title}")
        print("-" * 40)
        
        # Generate optimized chunks for FLAN-T5
        chunks = []
        
        # 1. Definition (concise and clear)
        chunks.append({
            "concept_id": concept_id,
            "vector_id": f"{concept_id}-definition",
            "title": title,
            "text": concept["content"]["definition"],
            "type": "definition"
        })
        
        # 2. Insurance context (industry-specific)
        chunks.append({
            "concept_id": concept_id,
            "vector_id": f"{concept_id}-context",
            "title": title,
            "text": concept["content"]["insurance_context"],
            "type": "context"
        })
        
        # 3. Technical details (for actuaries)
        chunks.append({
            "concept_id": concept_id,
            "vector_id": f"{concept_id}-technical",
            "title": title,
            "text": concept["content"]["technical_details"],
            "type": "technical"
        })
        
        # 4. Limitations (important context)
        chunks.append({
            "concept_id": concept_id,
            "vector_id": f"{concept_id}-limitations",
            "title": title,
            "text": concept["content"]["limitations"],
            "type": "limitations"
        })
        
        # 5. Audience-specific explanations (most important for personalization)
        for audience, explanation in concept["audience_explanations"].items():
            chunks.append({
                "concept_id": concept_id,
                "vector_id": f"{concept_id}-{audience}",
                "title": title,
                "text": explanation,
                "type": "audience",
                "audience": audience
            })
        
        # 6. Practical examples (real-world applications)
        for i, example in enumerate(concept["examples"]):
            combined_text = f"{example['context']}: {example['explanation']}"
            chunks.append({
                "concept_id": concept_id,
                "vector_id": f"{concept_id}-example-{i}",
                "title": title,
                "text": combined_text,
                "type": "example",
                "context": example["context"]
            })
        
        # 7. Action guidance (NEW - very practical for professionals)
        for audience, guidance in concept["action_guidance"].items():
            chunks.append({
                "concept_id": concept_id,
                "vector_id": f"{concept_id}-action-{audience}",
                "title": title,
                "text": f"Action guidance: {guidance}",
                "type": "action",
                "audience": audience
            })
        
        print(f"  📝 Generated {len(chunks)} chunks for {title}")
        
        # Generate embeddings and store each chunk
        concept_chunks = 0
        for chunk_num, chunk in enumerate(chunks, 1):
            try:
                # Generate embedding using sentence transformer
                embedding = model.encode(chunk["text"])
                
                # Prepare item for DynamoDB
                item = {
                    "concept_id": chunk["concept_id"],
                    "vector_id": chunk["vector_id"],
                    "title": chunk["title"],
                    "text": chunk["text"],
                    "type": chunk["type"],
                    "embedding": json.dumps(embedding.tolist())
                }
                
                # Add optional attributes
                if "audience" in chunk:
                    item["audience"] = chunk["audience"]
                if "context" in chunk:
                    item["context"] = chunk["context"]
                
                # Store in DynamoDB
                table.put_item(Item=item)
                concept_chunks += 1
                total_chunks += 1
                
                # Progress indicator
                if chunk_num % 3 == 0 or chunk_num == len(chunks):
                    print(f"    Processed {chunk_num}/{len(chunks)} chunks...")
                
            except Exception as chunk_error:
                print(f"    ❌ Error storing {chunk['vector_id']}: {str(chunk_error)}")
        
        print(f"  ✅ Stored {concept_chunks} chunks for {title}")
    
    print(f"\n✅ STEP 3 COMPLETE: Generated and stored {total_chunks} enhanced embeddings")
    
except Exception as e:
    print(f"❌ ERROR in Step 3: {str(e)}")
    embedding_success = False

# STEP 4: Validation and Testing
print("\n" + "="*50)
print("🔄 STEP 4: Validating Enhanced System")
print("="*50)

validation_success = True
try:
    # Test 1: Check embeddings are stored correctly
    print("🧪 Test 1: Checking DynamoDB embeddings...")
    response = table.scan(Limit=10)
    sample_items = response.get('Items', [])
    
    if sample_items:
        print(f"  ✅ Found {len(sample_items)} sample embeddings in database")
        
        # Show breakdown by type
        type_counts = {}
        for item in sample_items:
            item_type = item.get('type', 'unknown')
            type_counts[item_type] = type_counts.get(item_type, 0) + 1
        
        print("  📊 Embedding types found:")
        for embed_type, count in type_counts.items():
            print(f"    - {embed_type}: {count} items")
    else:
        print("  ❌ No embeddings found in database!")
        validation_success = False
    
    # Test 2: Test concept extraction logic
    print("\n🧪 Test 2: Testing concept extraction...")
    test_queries = [
        ("What is R-squared for an underwriter?", "r-squared", "underwriter"),
        ("Explain loss ratio to an executive", "loss-ratio", "executive"),
        ("How do predictive models help actuaries?", "predictive-model", "actuary")
    ]
    
    extraction_success = 0
    for query, expected_concept, expected_audience in test_queries:
        # Simulate extraction logic
        query_lower = query.lower()
        
        # Test concept detection
        if any(term in query_lower for term in ['r-squared', 'r squared', 'r2']):
            detected_concept = 'r-squared'
        elif 'loss ratio' in query_lower:
            detected_concept = 'loss-ratio'
        elif any(term in query_lower for term in ['predictive model', 'model']):
            detected_concept = 'predictive-model'
        else:
            detected_concept = 'unknown'
        
        # Test audience detection
        if 'underwriter' in query_lower:
            detected_audience = 'underwriter'
        elif 'actuary' in query_lower:
            detected_audience = 'actuary'
        elif 'executive' in query_lower:
            detected_audience = 'executive'
        else:
            detected_audience = 'general'
        
        concept_correct = detected_concept == expected_concept
        audience_correct = detected_audience == expected_audience
        
        if concept_correct and audience_correct:
            extraction_success += 1
            print(f"  ✅ '{query}' → {detected_concept}, {detected_audience}")
        else:
            print(f"  ❌ '{query}' → Expected: {expected_concept}, {expected_audience} | Got: {detected_concept}, {detected_audience}")
    
    print(f"  📊 Concept extraction: {extraction_success}/{len(test_queries)} tests passed")
    
    # Test 3: Test retrieval functionality
    print("\n🧪 Test 3: Testing enhanced retrieval...")
    test_concept = "r-squared"
    test_audience = "underwriter"
    
    try:
        # Query for concept
        response = table.query(
            KeyConditionExpression="concept_id = :concept_id",
            ExpressionAttributeValues={":concept_id": test_concept},
            Limit=10
        )
        
        items = response.get('Items', [])
        audience_specific = [item for item in items if item.get('audience') == test_audience]
        action_items = [item for item in items if item.get('type') == 'action']
        
        print(f"  ✅ Retrieved {len(items)} total items for '{test_concept}'")
        print(f"  ✅ Found {len(audience_specific)} audience-specific items for '{test_audience}'")
        print(f"  ✅ Found {len(action_items)} action guidance items")
        
        if audience_specific:
            sample_text = audience_specific[0]['text'][:100]
            print(f"  💬 Sample audience content: '{sample_text}...'")
        
    except Exception as retrieval_error:
        print(f"  ❌ Retrieval test failed: {str(retrieval_error)}")
        validation_success = False
    
    if validation_success:
        print(f"\n✅ STEP 4 COMPLETE: Enhanced system validation successful")
    else:
        print(f"\n⚠️ STEP 4 COMPLETE: Some validation issues found")
    
except Exception as e:
    print(f"❌ ERROR in Step 4: {str(e)}")
    validation_success = False

# FINAL SUMMARY
print("\n" + "="*60)
print("🎉 ENHANCED TECHTRANSLATOR IMPLEMENTATION SUMMARY")
print("="*60)

# Overall status
all_success = upload_success and embedding_success and validation_success

if all_success:
    status_emoji = "🎉"
    status_text = "COMPLETE & SUCCESSFUL"
else:
    status_emoji = "⚠️"
    status_text = "COMPLETED WITH ISSUES"

print(f"{status_emoji} STATUS: {status_text}")
print()

# Detailed results
print("📊 IMPLEMENTATION RESULTS:")
print(f"  {'✅' if upload_success else '❌'} S3 Knowledge Base Upload")
print(f"  {'✅' if clear_success else '⚠️'} DynamoDB Cleanup (non-critical)")
print(f"  {'✅' if embedding_success else '❌'} Enhanced Embeddings Generation")
print(f"  {'✅' if validation_success else '❌'} System Validation")

print(f"\n📈 ENHANCEMENTS IMPLEMENTED:")
print(f"  ✅ {len(enhanced_concepts)} concepts with rich, role-specific content")
print(f"  ✅ {total_chunks} optimized embeddings for FLAN-T5")
print(f"  ✅ Action guidance for practical application")
print(f"  ✅ Enhanced examples with real numbers and scenarios")
print(f"  ✅ Audience-specific explanations (underwriter/actuary/executive)")


print(f"\n🎯 EXPECTED IMPROVEMENTS:")
print("  📈 More relevant, role-specific responses")
print("  📈 Better follow-up conversation handling")
print("  📈 Professional language with practical examples")
print("  📈 Actionable insights for each audience type")

if all_success:
    print(f"\n✨ Your enhanced TechTranslator is ready to provide")
    print(f"   professional-grade explanations! ✨")
else:
    print(f"\n⚠️ Some issues occurred, but the core functionality")
    print(f"   should still work. Check the errors above.")

print("\n" + "="*60)
print("🏁 IMPLEMENTATION COMPLETE")
print("="*60)

2025-05-25 17:27:22.261058: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


🚀 ENHANCED TECHTRANSLATOR IMPLEMENTATION
📦 Initializing AWS clients and model...
✅ Initialization complete
📚 Loaded 3 enhanced concepts

🔄 STEP 1: Uploading Enhanced Knowledge Base to S3
  ✅ Uploaded r-squared.json
  ✅ Uploaded loss-ratio.json
  ✅ Uploaded predictive-model.json

✅ STEP 1 COMPLETE: Enhanced knowledge base uploaded to tech-translator-s3-knowledge-base

🔄 STEP 2: Clearing Old Embeddings from DynamoDB
  🔍 Scanning for existing embeddings...
  🗑️ Found 24 old embeddings to delete...
    Deleted 5/24 items...
    Deleted 10/24 items...
    Deleted 15/24 items...
    Deleted 20/24 items...
  ✅ Deleted 24 old embeddings

✅ STEP 2 COMPLETE: DynamoDB table cleared

🔄 STEP 3: Generating Enhanced Embeddings Optimized for FLAN-T5

📖 Processing Concept 1/3: R-squared
----------------------------------------
  📝 Generated 13 chunks for R-squared
    Processed 3/13 chunks...
    Processed 6/13 chunks...
    Processed 9/13 chunks...
    Processed 12/13 chunks...
    Processed 13/13 chu