# Atomus TAM Research - Debugging Tools

This notebook provides interactive tools for testing and debugging the Atomus TAM Research system.

## Features:
- Individual API testing
- Error diagnosis
- Data exploration
- Configuration validation
- Performance monitoring

In [None]:
# Setup and imports
import sys
import os
from pathlib import Path
import json
import pandas as pd
from datetime import datetime
import traceback

# Add src to path
sys.path.append('../src')

# Import our modules
from api_integrations import (
    create_hubspot_client,
    create_openai_client, 
    create_highergov_client
)
from utils import (
    get_logger,
    get_performance_tracker,
    log_system_info
)
from scoring_engine import AtomScoringEngine
from data_processing import AtomDataProcessor

print("✅ All imports successful")
print(f"📁 Working directory: {os.getcwd()}")

## 🔧 System Information

In [None]:
# Initialize logger and get system info
logger = get_logger()
tracker = get_performance_tracker()

print("🖥️ SYSTEM INFORMATION:")
log_system_info()

# Check environment variables
print("\n🔑 ENVIRONMENT VARIABLES:")
env_vars = {
    'HUBSPOT_API_KEY': os.getenv('HUBSPOT_API_KEY', 'Not set'),
    'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY', 'Not set'),
    'HIGHERGOV_API_KEY': os.getenv('HIGHERGOV_API_KEY', 'Not set')
}

for key, value in env_vars.items():
    if value == 'Not set':
        print(f"❌ {key}: {value}")
    else:
        # Show only first 10 and last 4 characters for security
        masked = f"{value[:10]}...{value[-4:]}" if len(value) > 14 else "[MASKED]"
        print(f"✅ {key}: {masked}")

## 🧪 Individual API Testing

Test each API separately to identify any connection issues.

In [None]:
# Test HubSpot API
print("🔵 TESTING HUBSPOT API...")
print("=" * 50)

try:
    hubspot_client = create_hubspot_client()
    print("✅ HubSpot client created")
    
    # Test connection
    status = hubspot_client.test_connection()
    print(f"📊 Connection Status: {status}")
    
    # Test searching for companies
    print("\n🔍 Testing company search...")
    search_results = hubspot_client.search_companies({'name': 'Test'})
    print(f"📋 Found {len(search_results)} companies matching 'Test'")
    
    # Show current stats
    print("\n📈 HubSpot Stats:")
    hubspot_client.log_stats_summary()
    
except Exception as e:
    print(f"❌ HubSpot API Error: {str(e)}")
    print(f"🔍 Traceback: {traceback.format_exc()}")

In [None]:
# Test OpenAI API
print("🤖 TESTING OPENAI API...")
print("=" * 50)

try:
    openai_client = create_openai_client()
    print("✅ OpenAI client created")
    
    # Test connection
    status = openai_client.test_connection()
    print(f"📊 Connection Status: {status}")
    
    # Test simple research
    print("\n🔍 Testing basic research...")
    test_research = openai_client.conduct_research(
        company_name="Firestorm",
        research_type="basic_research",
        research_category="company_overview"
    )
    
    print(f"📝 Research completed: {len(test_research['content'])} characters")
    print(f"🎯 Tokens used: {test_research['metadata']['tokens_used']}")
    print(f"📄 Preview: {test_research['content'][:200]}...")
    
    # Show current stats
    print("\n📈 OpenAI Stats:")
    openai_client.log_stats_summary()
    
except Exception as e:
    print(f"❌ OpenAI API Error: {str(e)}")
    print(f"🔍 Traceback: {traceback.format_exc()}")

In [None]:
# Test HigherGov API
print("🛡️ TESTING HIGHERGOV API...")
print("=" * 50)

try:
    highergov_client = create_highergov_client()
    print("✅ HigherGov client created")
    
    # Test connection
    status = highergov_client.test_connection()
    print(f"📊 Connection Status: {status}")
    
    # Test defense contractor analysis
    print("\n🔍 Testing defense contractor analysis...")
    defense_analysis = highergov_client.analyze_defense_contractor_status("Firestorm")
    
    print(f"📊 Defense Score: {defense_analysis['defense_contractor_score']}")
    print(f"📋 Contract Count: {defense_analysis['contract_analysis']['defense_contracts']}")
    print(f"🏢 Identifiers Found: {len(defense_analysis['identifiers'])}")
    
    # Show current stats
    print("\n📈 HigherGov Stats:")
    highergov_client.log_stats_summary()
    
except Exception as e:
    print(f"❌ HigherGov API Error: {str(e)}")
    print(f"🔍 Traceback: {traceback.format_exc()}")

## 📊 Configuration Testing

Validate that all configuration files are properly loaded.

In [None]:
# Test Scoring Engine Configuration
print("⚙️ TESTING SCORING ENGINE CONFIGURATION...")
print("=" * 60)

try:
    scoring_engine = AtomScoringEngine()
    print("✅ Scoring engine initialized")
    
    # Show configuration summary
    config = scoring_engine.get_config_summary()
    print(f"\n📋 Configuration Summary:")
    print(f"   📏 Scoring weights loaded: {len(config['weights'])} categories")
    print(f"   🔤 Keyword categories: {len(config['keywords'])}")
    print(f"   🎯 Tier thresholds: {config['tier_thresholds']}")
    
    # Show keyword counts
    print(f"\n🔤 Keyword Categories:")
    for category, keywords in config['keywords'].items():
        print(f"   {category}: {len(keywords)} keywords")
    
    print(f"\n⚖️ Scoring Weights:")
    for category, weight in config['weights'].items():
        print(f"   {category}: {weight}")
        
except Exception as e:
    print(f"❌ Scoring Engine Error: {str(e)}")
    print(f"🔍 Traceback: {traceback.format_exc()}")

In [None]:
# Test Data Processor
print("📋 TESTING DATA PROCESSOR...")
print("=" * 40)

try:
    data_processor = AtomDataProcessor()
    print("✅ Data processor initialized")
    
    # Load test data
    test_data = data_processor.load_prospect_database()
    print(f"📊 Loaded {len(test_data)} test companies")
    
    # Show sample data
    if test_data:
        print(f"\n📄 Sample company data:")
        sample = test_data[0]
        for key, value in sample.items():
            print(f"   {key}: {value}")
            
    # Test validation
    validation_result = data_processor.validate_company_data(sample)
    print(f"\n✅ Validation result: {validation_result['is_valid']}")
    if not validation_result['is_valid']:
        print(f"❌ Validation errors: {validation_result['errors']}")
        
except Exception as e:
    print(f"❌ Data Processor Error: {str(e)}")
    print(f"🔍 Traceback: {traceback.format_exc()}")

## 🧮 Interactive Scoring Testing

Test the scoring engine with individual companies.

In [None]:
# Interactive Company Scoring
print("🧮 INTERACTIVE SCORING TESTING...")
print("=" * 45)

# Define test company for scoring
test_company = {
    'name': 'Firestorm',
    'description': 'Firestorm is a defense technology company specializing in hypersonic propulsion systems and nuclear-powered unmanned aircraft systems for military applications.',
    'website': 'https://firestorm-defense.com',
    'industry': 'Defense Manufacturing',
    'size': '50-100 employees'
}

print(f"🏢 Testing company: {test_company['name']}")
print(f"📝 Description: {test_company['description']}")

try:
    # Calculate score
    scoring_result = scoring_engine.calculate_company_score(test_company)
    
    print(f"\n📊 SCORING RESULTS:")
    print(f"   🎯 Total Score: {scoring_result['total_score']:.1f}")
    print(f"   🏆 Tier: {scoring_result['tier_classification']}")
    
    print(f"\n📈 Component Scores:")
    for component, score in scoring_result['component_scores'].items():
        print(f"   {component}: {score:.1f}")
    
    print(f"\n🔤 Keywords Found:")
    for category, keywords in scoring_result['keywords_found'].items():
        if keywords:
            print(f"   {category}: {keywords}")
    
    print(f"\n⚖️ Weighted Calculation:")
    for component, details in scoring_result['calculation_details'].items():
        print(f"   {component}: {details['raw_score']:.1f} × {details['weight']} = {details['weighted_score']:.1f}")
        
except Exception as e:
    print(f"❌ Scoring Error: {str(e)}")
    print(f"🔍 Traceback: {traceback.format_exc()}")

## 🔄 End-to-End Workflow Testing

Test the complete workflow with a single company.

In [None]:
# Complete workflow test
def test_single_company_workflow(company_name):
    """Test complete workflow for one company"""
    
    print(f"🚀 COMPLETE WORKFLOW TEST: {company_name}")
    print("=" * 60)
    
    workflow_results = {
        'company_name': company_name,
        'timestamp': datetime.now().isoformat(),
        'steps': {}
    }
    
    # Step 1: HigherGov Analysis
    print(f"\n🛡️ Step 1: Defense Contractor Analysis...")
    try:
        defense_data = highergov_client.analyze_defense_contractor_status(company_name)
        workflow_results['steps']['defense_analysis'] = {
            'status': 'success',
            'score': defense_data['defense_contractor_score'],
            'contracts': defense_data['contract_analysis']['defense_contracts']
        }
        print(f"   ✅ Defense Score: {defense_data['defense_contractor_score']}")
        print(f"   📋 Contracts: {defense_data['contract_analysis']['defense_contracts']}")
    except Exception as e:
        workflow_results['steps']['defense_analysis'] = {'status': 'failed', 'error': str(e)}
        print(f"   ❌ Error: {str(e)}")
    
    # Step 2: OpenAI Research
    print(f"\n🤖 Step 2: AI Research...")
    try:
        research_data = openai_client.conduct_research(
            company_name=company_name,
            research_type="basic_research",
            research_category="company_overview"
        )
        workflow_results['steps']['ai_research'] = {
            'status': 'success',
            'tokens': research_data['metadata']['tokens_used'],
            'content_length': len(research_data['content'])
        }
        print(f"   ✅ Research completed: {research_data['metadata']['tokens_used']} tokens")
        print(f"   📄 Content preview: {research_data['content'][:150]}...")
    except Exception as e:
        workflow_results['steps']['ai_research'] = {'status': 'failed', 'error': str(e)}
        print(f"   ❌ Error: {str(e)}")
    
    # Step 3: Scoring
    print(f"\n🧮 Step 3: Scoring Calculation...")
    try:
        # Create company object for scoring
        company_data = {
            'name': company_name,
            'description': research_data['content'] if 'research_data' in locals() else f"Defense company: {company_name}"
        }
        
        scoring_result = scoring_engine.calculate_company_score(company_data)
        workflow_results['steps']['scoring'] = {
            'status': 'success',
            'total_score': scoring_result['total_score'],
            'tier': scoring_result['tier_classification']
        }
        print(f"   ✅ Score: {scoring_result['total_score']:.1f}")
        print(f"   🏆 Tier: {scoring_result['tier_classification']}")
    except Exception as e:
        workflow_results['steps']['scoring'] = {'status': 'failed', 'error': str(e)}
        print(f"   ❌ Error: {str(e)}")
    
    # Step 4: HubSpot Sync
    print(f"\n📊 Step 4: HubSpot Sync...")
    try:
        # Check if company exists
        existing_companies = hubspot_client.search_companies({'name': company_name})
        
        hubspot_data = {
            'name': company_name,
            'atomus_score': scoring_result['total_score'] if 'scoring_result' in locals() else 0,
            'tier_classification': scoring_result['tier_classification'] if 'scoring_result' in locals() else 'unknown',
            'last_research_date': datetime.now().strftime('%Y-%m-%d')
        }
        
        if existing_companies:
            # Update existing
            company_id = existing_companies[0]['id']
            hubspot_client.update_company(company_id, hubspot_data)
            workflow_results['steps']['hubspot_sync'] = {
                'status': 'updated',
                'company_id': company_id
            }
            print(f"   ✅ Updated existing company: {company_id}")
        else:
            # Create new
            new_company = hubspot_client.create_company(hubspot_data)
            workflow_results['steps']['hubspot_sync'] = {
                'status': 'created',
                'company_id': new_company['id']
            }
            print(f"   ✅ Created new company: {new_company['id']}")
            
    except Exception as e:
        workflow_results['steps']['hubspot_sync'] = {'status': 'failed', 'error': str(e)}
        print(f"   ❌ Error: {str(e)}")
    
    # Summary
    print(f"\n📋 WORKFLOW SUMMARY:")
    successful_steps = sum(1 for step in workflow_results['steps'].values() 
                          if step['status'] in ['success', 'created', 'updated'])
    total_steps = len(workflow_results['steps'])
    print(f"   ✅ Successful steps: {successful_steps}/{total_steps}")
    
    for step_name, step_data in workflow_results['steps'].items():
        status_emoji = "✅" if step_data['status'] in ['success', 'created', 'updated'] else "❌"
        print(f"   {status_emoji} {step_name}: {step_data['status']}")
    
    return workflow_results

# Run the test
test_result = test_single_company_workflow("Firestorm")

## 📈 Performance Monitoring

Monitor API performance and usage statistics.

In [None]:
# Performance Summary
print("📈 PERFORMANCE SUMMARY")
print("=" * 30)

# Show performance tracking results
print("⏱️ Timing Results:")
timing_results = tracker.get_timing_summary()
if timing_results:
    for operation, times in timing_results.items():
        avg_time = sum(times) / len(times) if times else 0
        print(f"   {operation}: {avg_time:.2f}s average ({len(times)} calls)")
else:
    print("   No timing data recorded")

# API Usage Statistics
print("\n📊 API Usage Statistics:")
try:
    print("\n🔵 HubSpot:")
    hubspot_client.log_stats_summary()
    
    print("\n🤖 OpenAI:")
    openai_client.log_stats_summary()
    
    print("\n🛡️ HigherGov:")
    highergov_client.log_stats_summary()
except Exception as e:
    print(f"❌ Error getting API stats: {str(e)}")

## 🔍 Data Exploration

Explore the test dataset and configuration.

In [None]:
# Load and explore test data
print("🔍 TEST DATA EXPLORATION")
print("=" * 30)

try:
    # Load prospect database
    test_companies = data_processor.load_prospect_database()
    df = pd.DataFrame(test_companies)
    
    print(f"📊 Dataset Overview:")
    print(f"   Companies: {len(df)}")
    print(f"   Columns: {list(df.columns)}")
    
    # Show the data
    print(f"\n📋 Company List:")
    display(df)
    
    # Show data types and statistics
    print(f"\n📈 Data Info:")
    print(df.info())
    
except Exception as e:
    print(f"❌ Error loading data: {str(e)}")

In [None]:
# Configuration file exploration
print("⚙️ CONFIGURATION EXPLORATION")
print("=" * 35)

try:
    # Load scoring config
    with open('../config/scoring_config.yaml', 'r') as f:
        import yaml
        scoring_config = yaml.safe_load(f)
    
    print("📏 Scoring Configuration:")
    print(f"   Weights: {scoring_config.get('weights', {})}")
    print(f"   Tier Thresholds: {scoring_config.get('tier_thresholds', {})}")
    
    # Load research prompts
    with open('../config/research_prompts.yaml', 'r') as f:
        research_config = yaml.safe_load(f)
    
    print(f"\n🤖 Research Prompts:")
    for category, prompts in research_config.items():
        if isinstance(prompts, dict):
            print(f"   {category}: {len(prompts)} prompts")
    
except Exception as e:
    print(f"❌ Error loading config: {str(e)}")

## 🛠️ Custom Testing Area

Use this section for your own testing and debugging.

In [None]:
# Custom testing area - modify as needed
print("🛠️ CUSTOM TESTING AREA")
print("=" * 25)

# Example: Test specific company
# company_to_test = "Overland AI"
# Add your custom testing code here

print("✅ Ready for custom testing")
print("💡 Modify this cell to test specific functionality")