In [1]:
# =============================================================================
# CELL 1: Setup and Directory Structure
# =============================================================================

import sys
from pathlib import Path

# Setup paths
project_root = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Configuration helper
def create_config(overrides=None):
    from config import PipelineConfig
    config = PipelineConfig(overrides)
    config.project_root = project_root
    config.documents_root = project_root / "documents"
    config.literature_folder = config.documents_root / "literature"
    config.your_work_folder = config.documents_root / "your_work"
    config.biblio_folder = config.documents_root / "biblio"
    config.current_drafts_folder = config.documents_root / "current_drafts"
    config.cache_file = project_root / "physics_knowledge_base.pkl"
    config._create_directories()
    return config

# Create configuration and directories
config = create_config()

print("📁 YOUR PHYSICS PIPELINE DIRECTORY STRUCTURE")
print("=" * 60)
print(f"Project Root: {project_root}")
print()

# Show where to put files
folders = {
    "biblio": "Put your Zotero .bib files here",
    "literature": "Downloaded papers go here (auto-created)",
    "your_work": "Your own published papers",
    "current_drafts": "Your current drafts"
}

for folder_name, description in folders.items():
    folder_path = config.documents_root / folder_name
    exists = "✅" if folder_path.exists() else "❌"
    file_count = len(list(folder_path.iterdir())) if folder_path.exists() else 0
    
    print(f"{exists} {folder_name}/ - {description}")
    print(f"   📍 Location: {folder_path}")
    print(f"   📊 Current files: {file_count}")
    print()

print("🎯 TO GET STARTED:")
print("1. Export .bib files from Zotero → Save to documents/biblio/")
print("2. Copy your papers → Save to documents/your_work/")
print("3. Run the commands below!")

📁 YOUR PHYSICS PIPELINE DIRECTORY STRUCTURE
Project Root: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis

✅ biblio/ - Put your Zotero .bib files here
   📍 Location: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/biblio
   📊 Current files: 1

✅ literature/ - Downloaded papers go here (auto-created)
   📍 Location: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature
   📊 Current files: 1

✅ your_work/ - Your own published papers
   📍 Location: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/your_work
   📊 Current files: 10

✅ current_drafts/ - Your current drafts
   📍 Location: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/current_drafts
   📊 Current files: 2

🎯 TO GET STARTED:
1. Export .bib files from Zotero → Save to documents/biblio/
2. Copy your papers → Save to documents/your_work/
3. Run t

In [2]:
# =============================================================================
# CELL 2: Check for Existing .bib Files
# =============================================================================

# Check what .bib files you already have
biblio_folder = config.biblio_folder
bib_files = list(biblio_folder.glob("*.bib"))

print("📚 CHECKING FOR EXISTING .BIB FILES")
print("=" * 40)

if bib_files:
    print(f"✅ Found {len(bib_files)} .bib file(s):")
    for bib_file in bib_files:
        file_size = bib_file.stat().st_size / 1024  # KB
        print(f"   📄 {bib_file.name} ({file_size:.1f} KB)")
    
    # Quick preview of first .bib file
    first_bib = bib_files[0]
    try:
        content = first_bib.read_text()
        lines = content.split('\n')[:10]
        print(f"\n📖 Preview of {first_bib.name}:")
        for line in lines:
            if line.strip():
                print(f"   {line[:60]}...")
                break
    except Exception as e:
        print(f"   Could not preview file: {e}")
        
    print(f"\n🚀 Ready to download literature!")
    
else:
    print("📭 No .bib files found")
    print("\n💡 TO ADD .BIB FILES:")
    print("1. Open Zotero")
    print("2. Select your collection")  
    print("3. Right-click → Export Collection")
    print("4. Choose 'BibTeX' format")
    print(f"5. Save to: {biblio_folder}")

📚 CHECKING FOR EXISTING .BIB FILES
📭 No .bib files found

💡 TO ADD .BIB FILES:
1. Open Zotero
2. Select your collection
3. Right-click → Export Collection
4. Choose 'BibTeX' format
5. Save to: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/biblio


In [4]:
# =============================================================================
# CELL 3: Download Literature from .bib Files (Enhanced with Debug Mode)
# =============================================================================

# Only run this if you have .bib files
bib_files = list(config.biblio_folder.glob("*.bib"))

if bib_files:
    print("🚀 DOWNLOADING LITERATURE FROM ARXIV")
    print("=" * 45)
    
    from src.downloaders import LiteratureDownloader
    
    # Use the first .bib file (or modify to use a specific one)
    bib_file = bib_files[0]
    print(f"📖 Using: {bib_file.name}")
    
    # Initialize enhanced downloader
    downloader = LiteratureDownloader(
        output_directory=config.literature_folder,
        delay_between_downloads=config.download_delay,
        arxiv_config=config.get_arxiv_config()
    )
    
    print("⏳ Starting download... (this may take several minutes)")
    print("💡 The system will:")
    print("   1. Parse your .bib file")
    print("   2. Search for papers on arXiv")
    print("   3. Download PDFs and TEX files")
    print("   4. Generate a detailed report")
    print()
    
    # Download papers with enhanced debugging
    results = downloader.download_from_bibtex(
        bib_file, 
        generate_report=True, 
        debug_mode=True  # Enhanced: Shows detailed debug output
    )
    
    print(f"\n✅ DOWNLOAD COMPLETE!")
    print(f"   📥 Successfully downloaded: {len(results['successful'])}")
    print(f"   ❌ Not found: {len(results['failed'])}")
    
    if results['successful']:
        print(f"\n📄 Sample downloads:")
        for i, result in enumerate(results['successful'][:3]):
            paper = result.paper_metadata
            print(f"   {i+1}. {paper.title[:50]}...")
            print(f"      arXiv: {result.search_result.arxiv_id}")
    
    # Enhanced: Show failed papers with suggestions
    if results['failed']:
        print(f"\n❌ Failed downloads ({len(results['failed'])}):")
        for i, result in enumerate(results['failed'][:3]):
            paper = result.paper_metadata
            print(f"   {i+1}. {paper.title[:50]}...")
            print(f"      Reason: {result.search_result.error_message}")
        
        print(f"\n💡 To debug specific failures, run:")
        print(f"   downloader.analyze_failed_paper('Paper Title Here')")
            
else:
    print("⏭️ SKIPPING DOWNLOAD - No .bib files found")
    print("Add .bib files to documents/biblio/ first!")

🚀 DOWNLOADING LITERATURE FROM ARXIV
📖 Using: DynamicsConditionalEnsembles_small.bib
2025-06-02 22:02:43 - physics_pipeline.src.downloaders.bibtex_parser - INFO - BibTeX parser initialized
2025-06-02 22:02:43 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - ArXiv searcher initialized
📁 Literature downloader initialized
   Output directory: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature
   Delay between downloads: 1.2s
   ArXiv config: {'delay': 1.2, 'max_retries': 3, 'timeout': 30, 'title_threshold': 0.6, 'abstract_threshold': 0.5, 'high_confidence_threshold': 0.9, 'google_api_key': '***HIDDEN***', 'google_search_engine_id': '6070be8a5a9764d74'}
2025-06-02 22:02:43 - physics_pipeline.src.downloaders.literature_downloader - INFO - Literature downloader initialized with output: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature
⏳ Starting download... (this may take several minutes)


    📝 Original title: Tight Bounds on the Convergence of Noisy Random Circuits to the Uniform Distribution
    🧹 Cleaned title: Tight Bounds on the Convergence of Noisy Random Circuits to the Uniform Distribution
    🔍 Search query: ti:"Tight Bounds on the Convergence of Noisy Random Circuits to the Uniform Distribution"
    📡 ArXiv API call: ti:"Tight Bounds on the Convergence of Noisy Random Circuits to the Uniform Distribution"
    📊 ArXiv returned 1 entries
    📊 Found 1 results from arXiv API
    📄 Result 1: 2112.00716 - Tight bounds on the convergence of noisy random circuits to ...
        📊 Title similarity: 1.000
        ✅ HIGH CONFIDENCE MATCH!
✅ Found via title search: 2112.00716 (confidence: 1.000)
📥 DOWNLOADING: 2112.00716
   📄 Title on arXiv: Tight bounds on the convergence of noisy random circuits to the uniform
  distribution
   🎯 Confidence: 1.000
   🔍 Method: title_high_confidence
2025-06-02 22:02:44 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - Downloadi

    📊 Found 0 results from arXiv API
❌ Abstract search failed: No matching papers found by abstract
🌐 Strategy 4: Google Custom Search API fallback...

🔍 GOOGLE FALLBACK ACTIVATED for: Quantum T-Designs: T-Wise Independence in the Quan...
2025-06-02 22:02:55 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - 🔍 Google Custom Search API activated for: Quantum T-Designs: T-Wise Independence in the Quan...
    📝 Search query: Quantum T-Designs: T-Wise Independence in the Quantum World
    📡 Making Google API request...
📊 Google returned 5 results
    📊 Google returned 5 results
    📄 Result 1: Quantum t-designs: t-wise independence in the quantum world...
        🔗 URL: https://arxiv.org/abs/quant-ph/0701126
        📋 Extracted arXiv ID: quant-ph/0701126
        📄 ArXiv title: Quantum t-designs: t-wise independence in the quantum world...
        📊 Abstract similarity: 0.965
        ✅ GOOGLE SEARCH MATCH FOUND!
✅ Found via Google API search: quant-ph/0701126 (confidence: 0.600)
📥 D

    📊 ArXiv returned 0 entries
    📊 Found 0 results from arXiv API
❌ Abstract search failed: No matching papers found by abstract
🌐 Strategy 4: Google Custom Search API fallback...

🔍 GOOGLE FALLBACK ACTIVATED for: Conditional Entropy and Data Processing: An Axioma...
2025-06-02 22:03:05 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - 🔍 Google Custom Search API activated for: Conditional Entropy and Data Processing: An Axioma...
    📝 Search query: Conditional Entropy and Data Processing: An Axiomatic Approach Based on Core-Concavity
    📡 Making Google API request...
📊 Google returned 5 results
    📊 Google returned 5 results
    📄 Result 1: An Extension of the Adversarial Threat Model in Quantitative...
        🔗 URL: http://www.arxiv.org/pdf/2409.04108
        📋 Extracted arXiv ID: 2409.04108
        📄 ArXiv title: An Extension of the Adversarial Threat Model in Quantitative...
        📊 Abstract similarity: 0.162
        ❌ Abstract validation failed
    📄 Result 2: A Va

    📄 Found on arXiv: Nonlinear Systems Identification Using Deep Dynamic Neural N...
    📊 Abstract similarity: 0.094
❌ Failed to validate existing arXiv ID: Failed to validate arXiv ID  disordered systems and neural networks
    📝 Original title: Postselection-Free Entanglement Dynamics via Spacetime Duality
    🧹 Cleaned title: Postselection Free Entanglement Dynamics via Spacetime Duality
    🔍 Search query: ti:"Postselection Free Entanglement Dynamics via Spacetime Duality"
    📡 ArXiv API call: ti:"Postselection Free Entanglement Dynamics via Spacetime Duality"
    📊 ArXiv returned 1 entries
    📊 Found 1 results from arXiv API
    📄 Result 1: 2010.15840 - Postselection-free entanglement dynamics via spacetime duali...
        📊 Title similarity: 1.000
        ✅ HIGH CONFIDENCE MATCH!
✅ Found via title search: 2010.15840 (confidence: 1.000)
📥 DOWNLOADING: 2010.15840
   📄 Title on arXiv: Postselection-free entanglement dynamics via spacetime duality
   🎯 Confidence: 1.000
   🔍 Met

2025-06-02 22:03:25 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - PDF downloaded: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/2208.10542.pdf
2025-06-02 22:03:25 - physics_pipeline.src.utils.file_utils - INFO - Extracted main tex file: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/2208.10542.tex
2025-06-02 22:03:25 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - TEX downloaded and extracted: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/2208.10542.tex
   ✅ Download completed!
      📄 PDF: Downloaded
      📝 TEX: Downloaded
✅ SUCCESS: Downloaded 2208.10542 in 3.1s
   📄 PDF: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/2208.10542.pdf
   📝 TEX: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/2208.10542.tex
📊 P

    📝 Original title: A Classical Analog for Electronic Degrees of Freedom in Nonadiabatic Collision Processes
    🧹 Cleaned title: A Classical Analog for Electronic Degrees of Freedom in Nonadiabatic Collision Processes
    🔍 Search query: ti:"A Classical Analog for Electronic Degrees of Freedom in Nonadiabatic Collision Processes"
    📡 ArXiv API call: ti:"A Classical Analog for Electronic Degrees of Freedom in Nonadiabatic Collision Processes"
    📊 ArXiv returned 0 entries
    📊 Found 0 results from arXiv API
❌ Title search failed: No matching papers found by title
📝 Strategy 3: Searching by abstract content...
    Abstract preview: It is shown how a formally exact classical analog can be defined for a finite dimensional (in Hilber...
    📝 Abstract snippet: It is shown how a formally exact classical analog can be def
    🔍 Search query: all:"It is shown how a formally exact classical analog can be def"
    📡 ArXiv API call: all:"It is shown how a formally exact classical analog ca

    📝 Original title: Effective Theory for the Measurement-Induced Phase Transition of Dirac Fermions
    🧹 Cleaned title: Effective Theory for the Measurement Induced Phase Transition of Dirac Fermions
    🔍 Search query: ti:"Effective Theory for the Measurement Induced Phase Transition of Dirac Fermions"
    📡 ArXiv API call: ti:"Effective Theory for the Measurement Induced Phase Transition of Dirac Fermions"
    📊 ArXiv returned 1 entries
    📊 Found 1 results from arXiv API
    📄 Result 1: 2102.08381 - Effective Theory for the Measurement-Induced Phase Transitio...
        📊 Title similarity: 1.000
        ✅ HIGH CONFIDENCE MATCH!
✅ Found via title search: 2102.08381 (confidence: 1.000)
📥 DOWNLOADING: 2102.08381
   📄 Title on arXiv: Effective Theory for the Measurement-Induced Phase Transition of Dirac
  Fermions
   🎯 Confidence: 1.000
   🔍 Method: title_high_confidence
2025-06-02 22:03:47 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - Downloading paper: 2102.08381
2025

    📊 Found 1 results from arXiv API
    📄 Result 1: 1611.05163 - Einstein-Podolsky-Rosen Steering and Quantum Steering Ellips...
        📊 Title similarity: 1.000
        ✅ HIGH CONFIDENCE MATCH!
✅ Found via title search: 1611.05163 (confidence: 1.000)
📥 DOWNLOADING: 1611.05163
   📄 Title on arXiv: Einstein-Podolsky-Rosen Steering and Quantum Steering Ellipsoids:
  Optimal Two-Qubit States and Projective Measurements
   🎯 Confidence: 1.000
   🔍 Method: title_high_confidence
2025-06-02 22:03:58 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - Downloading paper: 1611.05163
2025-06-02 22:03:58 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - PDF downloaded: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/1611.05163.pdf
2025-06-02 22:03:58 - physics_pipeline.src.utils.file_utils - INFO - Extracted main tex file: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/1611.05163.

2025-06-02 22:04:08 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - TEX downloaded and extracted: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/1106.4292.tex
   ✅ Download completed!
      📄 PDF: Downloaded
      📝 TEX: Downloaded
✅ SUCCESS: Downloaded 1106.4292 in 3.1s
   📄 PDF: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/1106.4292.pdf
   📝 TEX: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/1106.4292.tex
📊 Progress: 78.8% (26/33)
--------------------------------------------------

📄 PAPER 27/33
📝 Title: Steering Is an Essential Feature of Non-Locality in Quantum Theory
⏱️  Starting at: 22:04:09
🔍 Searching for paper...
   👥 Authors: Ramanathan, Ravishankar, Goyeneche, Dardo, Muhammad, Sadiq and 7 more
   📅 Year: 2015
   📖 Journal: Nature Communications
2025-06-02 22:04:09 - physics_pipeline.src.downloaders.arxiv_search

    📊 Found 1 results from arXiv API
    📄 Result 1: 1807.07082 - Entanglement Transitions from Holographic Random Tensor Netw...
        📊 Title similarity: 1.000
        ✅ HIGH CONFIDENCE MATCH!
✅ Found via title search: 1807.07082 (confidence: 1.000)
📥 DOWNLOADING: 1807.07082
   📄 Title on arXiv: Entanglement Transitions from Holographic Random Tensor Networks
   🎯 Confidence: 1.000
   🔍 Method: title_high_confidence
2025-06-02 22:04:20 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - Downloading paper: 1807.07082
2025-06-02 22:04:21 - physics_pipeline.src.downloaders.arxiv_searcher - INFO - PDF downloaded: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/1807.07082.pdf
2025-06-02 22:04:21 - physics_pipeline.src.utils.file_utils - INFO - Extracted main tex file: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis/documents/literature/1807.07082.tex
2025-06-02 22:04:21 - physics_pipeline.src.downloade

    📝 Original title: Universal Probability Distribution for the Wave Function of a Quantum System Entangled with Its Environment
    🧹 Cleaned title: Universal Probability Distribution for the Wave Function of a Quantum System Entangled with Its Environment
    🔍 Search query: ti:"Universal Probability Distribution for the Wave Function of a Quantum System Entangled with Its Environment"
    📡 ArXiv API call: ti:"Universal Probability Distribution for the Wave Function of a Quantum System Entangled with Its Environment"
    📊 ArXiv returned 1 entries
    📊 Found 1 results from arXiv API
    📄 Result 1: 1104.5482 - Universal Probability Distribution for the Wave Function of ...
        📊 Title similarity: 1.000
        ✅ HIGH CONFIDENCE MATCH!
✅ Found via title search: 1104.5482 (confidence: 1.000)
📥 DOWNLOADING: 1104.5482
   📄 Title on arXiv: Universal Probability Distribution for the Wave Function of a Quantum
  System Entangled with Its Environment
   🎯 Confidence: 1.000
   🔍 Method

In [None]:

downloader.analyze_failed_paper('Quantum T-Designs: T-Wise Independence in the Quantum World')


In [None]:
# =============================================================================
# CELL 4: Debug Specific Failed Papers (Add this new cell)
# =============================================================================

# Run this cell to debug specific papers that failed to download
print("🔬 DEBUG SPECIFIC FAILED PAPERS")
print("=" * 40)

# Check if we have any failed papers to debug
try:
    if 'results' in locals() and results['failed']:
        print(f"Found {len(results['failed'])} failed papers")
        
        # Debug the first failed paper
        failed_paper = results['failed'][0]
        paper_title = failed_paper.paper_metadata.title
        
        print(f"\n🧪 Debugging: {paper_title[:60]}...")
        downloader.analyze_failed_paper(paper_title)
        
        # If you want to debug a specific paper from your report, uncomment and modify:
        # specific_paper = "Classicality, the Ensemble Interpretation, and Decoherence: Resolving the Hyperion Dispute"
        # downloader.analyze_failed_paper(specific_paper)
        
    else:
        print("✅ No failed papers to debug (or no download results yet)")
        print("Run the download cell first, or manually debug a specific paper:")
        
        # Example: Debug the paper from your report
        example_paper = "Classicality, the Ensemble Interpretation, and Decoherence: Resolving the Hyperion Dispute"
        print(f"\n🧪 Example: Debugging known failing paper")
        print(f"Title: {example_paper}")
        
        # First check if the expected arXiv ID exists
        expected_id = "quant-ph/0605249"
        print(f"Expected arXiv ID: {expected_id}")
        
        if 'downloader' in locals():
            downloader.check_arxiv_directly(expected_id)
            downloader.analyze_failed_paper(example_paper)
        else:
            print("Run the download cell first to initialize the downloader")
            
except Exception as e:
    print(f"Debug error: {e}")
    print("Make sure you've run the download cell first")

In [None]:
# =============================================================================
# CELL 5: Manual Paper Testing (Add this new cell)
# =============================================================================

print("🧪 MANUAL PAPER TESTING")
print("=" * 30)

# Test individual papers manually
def test_paper(title, expected_arxiv_id=None):
    """Quick test function for individual papers"""
    if 'downloader' not in locals():
        print("❌ Downloader not initialized. Run the download cell first.")
        return
    
    print(f"\n📝 Testing: {title}")
    if expected_arxiv_id:
        print(f"🎯 Expected ID: {expected_arxiv_id}")
        
        # Check if ID exists directly
        exists = downloader.check_arxiv_directly(expected_arxiv_id)
        if exists:
            print(f"✅ Paper exists on arXiv")
        else:
            print(f"❌ Paper not found with expected ID")
    
    # Test the full search process
    result = downloader.test_single_paper(title, debug_mode=True)
    
    if result.search_result.found:
        print(f"✅ SUCCESS: Found {result.search_result.arxiv_id}")
    else:
        print(f"❌ FAILED: {result.search_result.error_message}")
    
    return result

# Test the papers from your report that failed
failing_papers = [
    ("Classicality, the Ensemble Interpretation, and Decoherence: Resolving the Hyperion Dispute", "quant-ph/0605249"),
    ("Quantum T-Designs: T-Wise Independence in the Quantum World", None),
    ("Dynamical Purification Phase Transition Induced by Quantum Measurements", None)
]

print("Testing papers from your failed download report...")

for title, expected_id in failing_papers:
    try:
        test_paper(title, expected_id)
        print("-" * 50)
    except Exception as e:
        print(f"Error testing {title[:30]}...: {e}")



In [None]:
# =============================================================================
# CELL 6: Build Knowledge Base (Keep your existing cell)
# =============================================================================

# This remains the same as your existing workflow
print("🧠 BUILDING PHYSICS KNOWLEDGE BASE")
print("=" * 40)

# Continue with knowledge base building as before...
# (Add your existing knowledge base building code here)

In [None]:
# =============================================================================
# CELL 4: Build Knowledge Base (WITH PYTORCH FIX)
# =============================================================================

# PYTORCH COMPATIBILITY FIX - Add this first!
import torch

if not hasattr(torch, 'get_default_device'):
    def get_default_device():
        """Fallback implementation for older PyTorch versions."""
        if torch.cuda.is_available():
            return torch.device('cuda')
        else:
            return torch.device('cpu')
    
    torch.get_default_device = get_default_device
    print("✅ Applied PyTorch compatibility fix")

# Now the imports should work
print("\n🏗️ BUILDING KNOWLEDGE BASE")
print("=" * 35)

from src.core import KnowledgeBase

# Initialize knowledge base
kb = KnowledgeBase(
    embedding_model=config.embedding_model,
    chunk_size=config.chunk_size,
    chunk_overlap=config.chunk_overlap
)

print("⏳ Building knowledge base from all documents...")
print("💡 This processes:")
print("   📚 Downloaded literature")
print("   📝 Your own papers") 
print("   ✏️  Current drafts")
print("   🧠 Creates semantic embeddings")
print()

# Build knowledge base
stats = kb.build_from_directories(
    literature_folder=config.literature_folder,
    your_work_folder=config.your_work_folder,
    current_drafts_folder=config.current_drafts_folder
)

print("✅ KNOWLEDGE BASE BUILT!")
print(f"   📚 Total documents: {stats.get('total_documents', 0)}")
print(f"   ✅ Successfully processed: {stats.get('successful_documents', 0)}")
print(f"   🧩 Total text chunks: {stats.get('total_chunks', 0)}")
print(f"   📊 Total words: {stats.get('total_words', 0):,}")

# Show source breakdown
source_breakdown = stats.get('source_breakdown', {})
if source_breakdown:
    print(f"\n📁 Source breakdown:")
    for source_type, info in source_breakdown.items():
        count = info.get('successful', 0) if isinstance(info, dict) else info
        print(f"   {source_type}: {count} documents")

# Save knowledge base
kb.save_to_file(config.cache_file)
print(f"\n💾 Knowledge base saved to: {config.cache_file}")

# Store for next cell
globals()['knowledge_base'] = kb

In [None]:
# =============================================================================
# CELL 5: Test Literature-Aware Chat (IF YOU HAVE API KEY)
# =============================================================================

print("\n🤖 TESTING LITERATURE-AWARE CHAT")
print("=" * 40)

try:
    config.validate_api_keys()
    
    from src.chat import LiteratureAssistant
    
    # Create literature assistant
    assistant = LiteratureAssistant(
        knowledge_base=kb,
        anthropic_api_key=config.anthropic_api_key,
        chat_config=config.get_chat_config()
    )
    
    print("✅ Literature assistant created!")
    print(f"📊 Knowledge base: {kb.get_statistics().get('total_documents', 0)} documents")
    
    # Simple chat function
    def chat(question, show_sources=True):
        """Simple chat with the literature assistant."""
        print(f"\n🧑‍🔬 Question: {question}")
        print("🤖 Assistant: ", end="", flush=True)
        
        try:
            response = assistant.ask(question)
            print(response.content)
            
            if show_sources and response.sources_used:
                print(f"\n📚 Sources: {', '.join(response.sources_used)}")
            
        except Exception as e:
            print(f"Error: {e}")
        
        print("\n" + "-" * 50)
    
    print("\n💬 READY FOR CHAT!")
    print("💡 Try these commands:")
    print("   chat('What papers do I have in my knowledge base?')")
    print("   chat('What are the main research topics?')")
    print("   chat('Explain quantum entanglement')")
    
    # Store chat function for easy use
    globals()['chat'] = chat
    globals()['assistant'] = assistant
    
except ValueError as e:
    print("⚠️  Cannot test chat - API key not set")
    print("💡 Set your ANTHROPIC_API_KEY environment variable to test chat")

# =============================================================================
# CELL 6: Summary and Next Steps
# =============================================================================

print("\n🎉 PHYSICS PIPELINE SETUP COMPLETE!")
print("=" * 45)

# Final statistics
bib_count = len(list(config.biblio_folder.glob("*.bib")))
lit_count = len(list(config.literature_folder.iterdir())) if config.literature_folder.exists() else 0
work_count = len(list(config.your_work_folder.iterdir())) if config.your_work_folder.exists() else 0

print(f"📊 FINAL STATUS:")
print(f"   📚 .bib files: {bib_count}")
print(f"   📄 Downloaded papers: {lit_count}")
print(f"   📝 Your papers: {work_count}")
print(f"   🧠 Knowledge base: {'✅ Built' if 'knowledge_base' in globals() else '❌ Not built'}")
print(f"   🤖 Chat ready: {'✅ Yes' if 'chat' in globals() else '❌ Need API key'}")

print(f"\n🚀 WHAT YOU CAN DO NOW:")
if 'chat' in globals():
    print("   • chat('your question here') - Ask the AI assistant")
    print("   • assistant.synthesize_literature('topic') - Get research synthesis")
    print("   • assistant.help_with_writing('task') - Get writing help")
else:
    print("   • Add .bib files and run cells above")
    print("   • Set ANTHROPIC_API_KEY for chat functionality")

print(f"\n📁 KEY LOCATIONS:")
print(f"   📚 Add .bib files: {config.biblio_folder}")
print(f"   📝 Add your papers: {config.your_work_folder}")
print(f"   📄 Downloaded papers: {config.literature_folder}")
print(f"   💾 Knowledge base cache: {config.cache_file}")

print("\n🎯 Your physics literature synthesis pipeline is ready! 🔬📚")

In [None]:
# Test 1: Overview of your collection
chat("What papers do I have in my knowledge base?")

In [None]:
chat("Do you know what measurement-induced entanglement phase transitions are?")