# Atomus TAM Research - MVP Demo

This notebook demonstrates the complete Atomus TAM Research workflow for identifying and scoring defense contractors.

## Workflow Overview:
1. **HigherGov Analysis** - Verify defense contractor status
2. **OpenAI Research** - Conduct AI-powered company research
3. **Scoring Engine** - Calculate weighted scores and tier classification
4. **HubSpot Sync** - Update CRM with results

## Test Dataset:
We'll process the 13 defense contractor companies through the complete pipeline.

In [None]:
# Setup and imports
import sys
import os
from pathlib import Path
import json
import pandas as pd
from datetime import datetime
import time

# Add src to path
sys.path.append('../src')

# Import our modules
from api_integrations import (
    create_hubspot_client,
    create_openai_client, 
    create_highergov_client
)
from utils import get_logger, get_performance_tracker
from scoring_engine import AtomScoringEngine
from data_processing import AtomDataProcessor

print("🚀 Atomus TAM Research MVP Demo")
print("=" * 40)
print("✅ All modules imported successfully")
print(f"📁 Working directory: {os.getcwd()}")

## 🔧 System Initialization

In [None]:
# Initialize system components
logger = get_logger()
tracker = get_performance_tracker()

print("🔧 Initializing system components...")

# Initialize API clients
hubspot_client = create_hubspot_client()
openai_client = create_openai_client()
highergov_client = create_highergov_client()

# Initialize processing engines
scoring_engine = AtomScoringEngine()
data_processor = AtomDataProcessor()

print("✅ All components initialized")

# Test API connections
print("\n🔗 Testing API connections...")
apis_status = {
    'HubSpot': hubspot_client.test_connection(),
    'OpenAI': openai_client.test_connection(),
    'HigherGov': highergov_client.test_connection()
}

for api_name, status in apis_status.items():
    emoji = "✅" if status.get('status') == 'connected' else "❌"
    print(f"   {emoji} {api_name}: {status.get('status', 'unknown')}")

all_connected = all(status.get('status') == 'connected' for status in apis_status.values())
print(f"\n{'🟢' if all_connected else '🔴'} System Status: {'Ready' if all_connected else 'API Issues Detected'}")

## 📋 Load Test Dataset

In [None]:
# Load the 13 test companies
print("📋 Loading test dataset...")

test_companies_data = data_processor.load_prospect_database()
df_companies = pd.DataFrame(test_companies_data)

print(f"✅ Loaded {len(df_companies)} companies")
print(f"📊 Columns: {list(df_companies.columns)}")

# Display the test companies
print("\n🏢 Test Companies:")
display(df_companies)

# Extract company names for processing
company_names = df_companies['name'].tolist()
print(f"\n🎯 Companies to process: {company_names}")

## 🚀 MVP Workflow Execution

Process each company through the complete workflow.

In [None]:
# Main workflow function
def process_company_workflow(company_name, company_data=None):
    """
    Process a single company through the complete Atomus workflow
    """
    print(f"\n🏢 Processing: {company_name}")
    print("=" * 50)
    
    tracker.start_timing(f"company_workflow_{company_name}")
    
    results = {
        'company_name': company_name,
        'timestamp': datetime.now().isoformat(),
        'workflow_steps': {},
        'final_results': {}
    }
    
    # Step 1: HigherGov Defense Contractor Analysis
    print("🛡️ Step 1: Defense Contractor Analysis...")
    try:
        tracker.start_timing("highergov_analysis")
        defense_data = highergov_client.analyze_defense_contractor_status(company_name)
        tracker.end_timing("highergov_analysis", f"Analyzed {company_name}")
        
        results['workflow_steps']['defense_analysis'] = {
            'status': 'success',
            'defense_score': defense_data['defense_contractor_score'],
            'contract_count': defense_data['contract_analysis']['defense_contracts'],
            'total_contracts': defense_data['contract_analysis']['total_contracts'],
            'identifiers_found': len(defense_data['identifiers'])
        }
        
        print(f"   ✅ Defense Score: {defense_data['defense_contractor_score']}/100")
        print(f"   📋 Defense Contracts: {defense_data['contract_analysis']['defense_contracts']}")
        print(f"   🏢 Total Contracts: {defense_data['contract_analysis']['total_contracts']}")
        
    except Exception as e:
        results['workflow_steps']['defense_analysis'] = {
            'status': 'failed',
            'error': str(e)
        }
        print(f"   ❌ Error: {str(e)}")
    
    # Step 2: OpenAI Research
    print("\n🤖 Step 2: AI-Powered Research...")
    try:
        tracker.start_timing("openai_research")
        research_data = openai_client.conduct_research(
            company_name=company_name,
            research_type="basic_research",
            research_category="defense_contractor_analysis"
        )
        tracker.end_timing("openai_research", f"Researched {company_name}")
        
        results['workflow_steps']['ai_research'] = {
            'status': 'success',
            'tokens_used': research_data['metadata']['tokens_used'],
            'content_length': len(research_data['content']),
            'research_summary': research_data['content'][:300] + "..." if len(research_data['content']) > 300 else research_data['content']
        }
        
        print(f"   ✅ Research completed: {research_data['metadata']['tokens_used']} tokens")
        print(f"   📄 Content length: {len(research_data['content'])} characters")
        print(f"   📝 Preview: {research_data['content'][:150]}...")
        
    except Exception as e:
        results['workflow_steps']['ai_research'] = {
            'status': 'failed',
            'error': str(e)
        }
        research_data = None
        print(f"   ❌ Error: {str(e)}")
    
    # Step 3: Scoring Engine
    print("\n🧮 Step 3: Scoring Calculation...")
    try:
        tracker.start_timing("scoring_calculation")
        
        # Prepare company data for scoring
        scoring_input = {
            'name': company_name,
            'description': research_data['content'] if research_data else f"Defense contractor: {company_name}",
            'defense_contractor_score': results['workflow_steps']['defense_analysis'].get('defense_score', 0) if results['workflow_steps']['defense_analysis']['status'] == 'success' else 0
        }
        
        # Add company data if available
        if company_data:
            scoring_input.update(company_data)
        
        scoring_result = scoring_engine.calculate_company_score(scoring_input)
        tracker.end_timing("scoring_calculation", f"Scored {company_name}")
        
        results['workflow_steps']['scoring'] = {
            'status': 'success',
            'total_score': scoring_result['total_score'],
            'tier_classification': scoring_result['tier_classification'],
            'component_scores': scoring_result['component_scores'],
            'keywords_found': scoring_result['keywords_found']
        }
        
        print(f"   ✅ Total Score: {scoring_result['total_score']:.1f}/100")
        print(f"   🏆 Tier: {scoring_result['tier_classification']}")
        print(f"   📊 Component Scores: {scoring_result['component_scores']}")
        
        # Show keywords found
        keywords_summary = []
        for category, keywords in scoring_result['keywords_found'].items():
            if keywords:
                keywords_summary.append(f"{category}: {len(keywords)}")
        
        if keywords_summary:
            print(f"   🔤 Keywords: {', '.join(keywords_summary)}")
        
    except Exception as e:
        results['workflow_steps']['scoring'] = {
            'status': 'failed',
            'error': str(e)
        }
        scoring_result = None
        print(f"   ❌ Error: {str(e)}")
    
    # Step 4: HubSpot Sync
    print("\n📊 Step 4: HubSpot CRM Sync...")
    try:
        tracker.start_timing("hubspot_sync")
        
        # Prepare HubSpot data
        hubspot_data = {
            'name': company_name,
            'atomus_score': scoring_result['total_score'] if scoring_result else 0,
            'defense_contract_score': results['workflow_steps']['defense_analysis'].get('defense_score', 0) if results['workflow_steps']['defense_analysis']['status'] == 'success' else 0,
            'tier_classification': scoring_result['tier_classification'] if scoring_result else 'unscored',
            'last_research_date': datetime.now().strftime('%Y-%m-%d'),
            'research_summary': results['workflow_steps']['ai_research'].get('research_summary', 'Research failed') if results['workflow_steps']['ai_research']['status'] == 'success' else 'Research failed'
        }
        
        # Check if company exists in HubSpot
        existing_companies = hubspot_client.search_companies({'name': company_name})
        
        if existing_companies:
            # Update existing company
            company_id = existing_companies[0]['id']
            updated_company = hubspot_client.update_company(company_id, hubspot_data)
            
            results['workflow_steps']['hubspot_sync'] = {
                'status': 'updated',
                'company_id': company_id,
                'action': 'updated existing record'
            }
            print(f"   ✅ Updated existing HubSpot record: {company_id}")
            
        else:
            # Create new company
            new_company = hubspot_client.create_company(hubspot_data)
            
            results['workflow_steps']['hubspot_sync'] = {
                'status': 'created',
                'company_id': new_company['id'],
                'action': 'created new record'
            }
            print(f"   ✅ Created new HubSpot record: {new_company['id']}")
        
        tracker.end_timing("hubspot_sync", f"Synced {company_name}")
        
    except Exception as e:
        results['workflow_steps']['hubspot_sync'] = {
            'status': 'failed',
            'error': str(e)
        }
        print(f"   ❌ Error: {str(e)}")
    
    # Calculate final results
    successful_steps = sum(1 for step in results['workflow_steps'].values() 
                          if step['status'] in ['success', 'created', 'updated'])
    total_steps = len(results['workflow_steps'])
    
    results['final_results'] = {
        'success_rate': f"{successful_steps}/{total_steps}",
        'overall_status': 'success' if successful_steps == total_steps else 'partial_success' if successful_steps > 0 else 'failed',
        'atomus_score': scoring_result['total_score'] if scoring_result else 0,
        'tier': scoring_result['tier_classification'] if scoring_result else 'unscored'
    }
    
    tracker.end_timing(f"company_workflow_{company_name}", f"Completed workflow")
    
    # Print summary
    print(f"\n📋 WORKFLOW SUMMARY:")
    status_emoji = "✅" if results['final_results']['overall_status'] == 'success' else "⚠️" if results['final_results']['overall_status'] == 'partial_success' else "❌"
    print(f"   {status_emoji} Overall Status: {results['final_results']['overall_status']}")
    print(f"   📈 Success Rate: {results['final_results']['success_rate']}")
    print(f"   🎯 Final Score: {results['final_results']['atomus_score']:.1f}")
    print(f"   🏆 Tier: {results['final_results']['tier']}")
    
    return results

print("✅ Workflow function defined")

## 🎯 Process Sample Companies

Let's start with 3 companies to test the workflow.

In [None]:
# Process a subset of companies first (3 companies for initial testing)
sample_companies = company_names[:3]  # Firestorm, Firehawk, Overland AI

print(f"🎯 Processing {len(sample_companies)} sample companies...")
print(f"Companies: {sample_companies}")
print("\n" + "=" * 80)

workflow_results = []
start_time = time.time()

for i, company_name in enumerate(sample_companies, 1):
    print(f"\n🚀 [{i}/{len(sample_companies)}] Starting workflow for {company_name}...")
    
    # Get company data from our dataset if available
    company_row = df_companies[df_companies['name'] == company_name]
    company_data = company_row.iloc[0].to_dict() if not company_row.empty else None
    
    try:
        result = process_company_workflow(company_name, company_data)
        workflow_results.append(result)
        
    except Exception as e:
        print(f"❌ Workflow failed for {company_name}: {str(e)}")
        workflow_results.append({
            'company_name': company_name,
            'timestamp': datetime.now().isoformat(),
            'error': str(e),
            'final_results': {'overall_status': 'failed'}
        })
    
    # Add delay between companies to respect API limits
    if i < len(sample_companies):
        print("⏳ Waiting 2 seconds before next company...")
        time.sleep(2)

total_time = time.time() - start_time
print(f"\n⏱️ Total processing time: {total_time:.1f} seconds")
print(f"📊 Average time per company: {total_time/len(sample_companies):.1f} seconds")

## 📊 Results Analysis

In [None]:
# Analyze results
print("📊 RESULTS ANALYSIS")
print("=" * 30)

if workflow_results:
    # Create results dataframe
    results_summary = []
    
    for result in workflow_results:
        company_name = result['company_name']
        final_results = result.get('final_results', {})
        
        summary = {
            'Company': company_name,
            'Status': final_results.get('overall_status', 'unknown'),
            'Score': final_results.get('atomus_score', 0),
            'Tier': final_results.get('tier', 'unknown'),
            'Success_Rate': final_results.get('success_rate', '0/4')
        }
        
        # Add step-by-step status
        workflow_steps = result.get('workflow_steps', {})
        summary['Defense_Analysis'] = workflow_steps.get('defense_analysis', {}).get('status', 'unknown')
        summary['AI_Research'] = workflow_steps.get('ai_research', {}).get('status', 'unknown')
        summary['Scoring'] = workflow_steps.get('scoring', {}).get('status', 'unknown')
        summary['HubSpot_Sync'] = workflow_steps.get('hubspot_sync', {}).get('status', 'unknown')
        
        results_summary.append(summary)
    
    # Display results table
    df_results = pd.DataFrame(results_summary)
    print("🏆 Workflow Results Summary:")
    display(df_results)
    
    # Calculate statistics
    successful_companies = len([r for r in workflow_results if r.get('final_results', {}).get('overall_status') == 'success'])
    partial_success = len([r for r in workflow_results if r.get('final_results', {}).get('overall_status') == 'partial_success'])
    failed_companies = len([r for r in workflow_results if r.get('final_results', {}).get('overall_status') == 'failed'])
    
    print(f"\n📈 Overall Statistics:")
    print(f"   ✅ Fully Successful: {successful_companies}/{len(workflow_results)}")
    print(f"   ⚠️ Partial Success: {partial_success}/{len(workflow_results)}")
    print(f"   ❌ Failed: {failed_companies}/{len(workflow_results)}")
    
    # Score distribution
    scores = [r.get('final_results', {}).get('atomus_score', 0) for r in workflow_results if isinstance(r.get('final_results', {}).get('atomus_score'), (int, float))]
    if scores:
        print(f"\n🎯 Score Statistics:")
        print(f"   Average Score: {sum(scores)/len(scores):.1f}")
        print(f"   Highest Score: {max(scores):.1f}")
        print(f"   Lowest Score: {min(scores):.1f}")
    
    # Tier distribution
    tiers = [r.get('final_results', {}).get('tier', 'unknown') for r in workflow_results]
    tier_counts = {}
    for tier in tiers:
        tier_counts[tier] = tier_counts.get(tier, 0) + 1
    
    print(f"\n🏆 Tier Distribution:")
    for tier, count in tier_counts.items():
        print(f"   {tier}: {count} companies")
    
else:
    print("❌ No results to analyze")

## 📈 Performance Metrics

In [None]:
# Performance analysis
print("📈 PERFORMANCE METRICS")
print("=" * 25)

# API Usage Statistics
print("🔵 HubSpot API Usage:")
hubspot_client.log_stats_summary()

print("\n🤖 OpenAI API Usage:")
openai_client.log_stats_summary()

print("\n🛡️ HigherGov API Usage:")
highergov_client.log_stats_summary()

# Timing analysis
print("\n⏱️ Timing Analysis:")
timing_results = tracker.get_timing_summary()
if timing_results:
    for operation, times in timing_results.items():
        if times:
            avg_time = sum(times) / len(times)
            total_time = sum(times)
            print(f"   {operation}: {avg_time:.2f}s avg, {total_time:.2f}s total ({len(times)} calls)")
else:
    print("   No timing data available")

## 💾 Save Results

In [None]:
# Save workflow results
print("💾 SAVING RESULTS")
print("=" * 20)

try:
    # Create results directory
    results_dir = Path("../data/research_results")
    results_dir.mkdir(parents=True, exist_ok=True)
    
    # Save detailed results
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    detailed_results_file = results_dir / f"mvp_demo_detailed_{timestamp}.json"
    
    with open(detailed_results_file, 'w', encoding='utf-8') as f:
        json.dump(workflow_results, f, indent=2, ensure_ascii=False)
    
    print(f"✅ Detailed results saved: {detailed_results_file}")
    
    # Save summary CSV
    if 'df_results' in locals():
        summary_csv_file = results_dir / f"mvp_demo_summary_{timestamp}.csv"
        df_results.to_csv(summary_csv_file, index=False)
        print(f"✅ Summary CSV saved: {summary_csv_file}")
    
    # Save performance metrics
    performance_file = results_dir / f"mvp_demo_performance_{timestamp}.json"
    performance_data = {
        'timing_results': timing_results,
        'api_stats': {
            'hubspot': hubspot_client.get_stats_summary(),
            'openai': openai_client.get_stats_summary(),
            'highergov': highergov_client.get_stats_summary()
        },
        'processed_companies': len(workflow_results),
        'total_processing_time': total_time if 'total_time' in locals() else 0
    }
    
    with open(performance_file, 'w', encoding='utf-8') as f:
        json.dump(performance_data, f, indent=2, ensure_ascii=False)
    
    print(f"✅ Performance data saved: {performance_file}")
    
except Exception as e:
    print(f"❌ Error saving results: {str(e)}")

print(f"\n📁 All results saved to: {results_dir}")

## 🎯 Next Steps

Based on the results above, you can:

1. **Process More Companies**: Change `sample_companies` to include more companies from the dataset
2. **Debug Issues**: Use the debugging tools notebook to investigate any failures
3. **Adjust Configuration**: Modify scoring weights or research prompts based on results
4. **Scale Up**: Process all 13 companies once you're satisfied with the results

## 🔧 Configuration Adjustments

Use this section to test configuration changes:

In [None]:
# Configuration testing area
print("🔧 CONFIGURATION TESTING")
print("=" * 30)

# Example: Test different scoring weights
current_config = scoring_engine.get_config_summary()
print(f"📏 Current scoring weights: {current_config['weights']}")
print(f"🎯 Current tier thresholds: {current_config['tier_thresholds']}")

# You can modify and test new configurations here
print("\n💡 Use this section to test configuration changes")

## ✅ MVP Demo Complete

The MVP demo has successfully demonstrated:

- ✅ Complete API integration workflow
- ✅ Defense contractor analysis via HigherGov
- ✅ AI-powered research via OpenAI
- ✅ Weighted scoring and tier classification
- ✅ HubSpot CRM synchronization
- ✅ Performance monitoring and error handling
- ✅ Results export and analysis

**Ready for production scaling!** 🚀