In [1]:
# Quick test in a new notebook cell
import numpy as np
import torch
from sentence_transformers import SentenceTransformer

print("✅ All imports successful!")
print(f"NumPy version: {np.__version__}")
print(f"PyTorch version: {torch.__version__}")

✅ All imports successful!
NumPy version: 1.26.4
PyTorch version: 2.2.2


In [2]:
# Physics Literature Synthesis Pipeline - Fixed Testing Script
# Run these cells to test the modular architecture

# =============================================================================
# CELL 1: Setup and Path Configuration (ESSENTIAL!)
# =============================================================================

import os
import sys
from pathlib import Path

# Correct path setup for notebooks folder
project_root = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
print(f"✅ Project root: {project_root}")

# Add project root to Python path (this was the missing piece!)
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))
    print(f"✅ Added project root to Python path")

print(f"✅ Python path configured correctly")


# =============================================================================
# CELL 2: Test Configuration Module (FIXED PATHS)
# =============================================================================

print("🧪 Testing Configuration Module (with path fixes)...")

try:
    from pathlib import Path
    from config import PipelineConfig, default_config
    
    # Get correct project root (go up from notebooks folder)
    project_root = Path.cwd().parent if Path.cwd().name == 'notebooks' else Path.cwd()
    
    # Create a helper function for correct config
    def create_config(overrides=None):
        """Create config with correct paths for notebook usage."""
        config = PipelineConfig(overrides)
        
        # Fix all the paths to use project root instead of cwd
        config.project_root = project_root
        config.documents_root = project_root / "documents"
        config.biblio_folder = config.documents_root / "biblio"
        config.literature_folder = config.documents_root / "literature" 
        config.your_work_folder = config.documents_root / "your_work"
        config.current_drafts_folder = config.documents_root / "current_drafts"
        config.cache_file = project_root / "physics_knowledge_base.pkl"
        config.reports_folder = project_root / "reports"
        
        # Ensure directories exist
        config._create_directories()
        
        return config
    
    # Test default config (showing the problem)
    print(f"📊 Default config paths (problematic):")
    print(f"   Literature folder: {default_config.literature_folder}")
    print(f"   Cache file: {default_config.cache_file}")
    
    # Test corrected config
    corrected_config = create_config()
    print(f"\n✅ Corrected config paths:")
    print(f"   Project root: {corrected_config.project_root}")
    print(f"   Literature folder: {corrected_config.literature_folder}")
    print(f"   Cache file: {corrected_config.cache_file}")
    print(f"   Embedding model: {corrected_config.embedding_model}")
    print(f"   Supported extensions: {corrected_config.supported_extensions}")
    
    # Test custom config with overrides
    custom_config = create_config({
        'chunk_size': 500,
        'default_temperature': 0.5,
        'max_context_chunks': 10,
        'download_delay': 2.0
    })
    print(f"\n✅ Custom config created with overrides:")
    print(f"   Chunk size: {custom_config.chunk_size}")
    print(f"   Temperature: {custom_config.default_temperature}")
    print(f"   Max context chunks: {custom_config.max_context_chunks}")
    print(f"   Download delay: {custom_config.download_delay}")
    
    # Test directory creation
    print(f"\n✅ Directory verification:")
    print(f"   Documents root exists: {corrected_config.documents_root.exists()}")
    print(f"   Literature folder exists: {corrected_config.literature_folder.exists()}")
    print(f"   Your work folder exists: {corrected_config.your_work_folder.exists()}")
    print(f"   Biblio folder exists: {corrected_config.biblio_folder.exists()}")
    print(f"   Current drafts folder exists: {corrected_config.current_drafts_folder.exists()}")
    
    # Test configuration methods
    arxiv_config = corrected_config.get_arxiv_config()
    embedding_config = corrected_config.get_embedding_config()
    chat_config = corrected_config.get_chat_config()
    
    print(f"\n✅ Configuration methods working:")
    print(f"   arXiv delay: {arxiv_config['delay']} seconds")
    print(f"   arXiv title threshold: {arxiv_config['title_threshold']}")
    print(f"   Embedding model: {embedding_config['model_name']}")
    print(f"   Embedding chunk size: {embedding_config['chunk_size']}")
    print(f"   Chat model: {chat_config['model']}")
    print(f"   Chat max tokens: {chat_config['max_tokens']}")
    
    # Test API key validation (will show warning if not set)
    try:
        corrected_config.validate_api_keys()
        print(f"✅ API key validation passed")
    except ValueError as e:
        print(f"⚠️  API key validation: {e}")
        print(f"   (This is expected if ANTHROPIC_API_KEY is not set)")
    
    # Test string representation
    print(f"\n📄 Configuration summary:")
    print(corrected_config)
    
    # Store the working config for use in other cells
    globals()['working_config'] = corrected_config
    print(f"\n💾 Stored working config as 'working_config' for other cells")
    
except Exception as e:
    print(f"❌ Configuration test failed: {e}")
    import traceback
    traceback.print_exc()

# =============================================================================
# CELL 3: Test Utilities 
# =============================================================================

print("\n🧪 Testing Utilities...")

try:
    from src.utils import setup_logging, get_logger, clean_filename, safe_file_read
    
    # Test logging
    logger = setup_logging(level="INFO", console_output=True)
    test_logger = get_logger("test_module")
    test_logger.info("Test log message from utilities")
    print("✅ Logging system working")
    
    # Test file utilities
    test_names = [
        "test/file:name*with?chars",
        "normal_filename.pdf",
        "file with spaces and symbols!@#.tex"
    ]
    
    print("✅ Filename cleaning tests:")
    for name in test_names:
        cleaned = clean_filename(name)
        print(f"   '{name}' → '{cleaned}'")
    
    # Test safe file read with non-existent file
    content = safe_file_read(Path("nonexistent_file_test.txt"))
    print(f"✅ Safe file read (empty result): '{content}' (length: {len(content)})")
    
    # Test with existing file (create a temp file)
    temp_file = Path("temp_test.txt")
    temp_file.write_text("This is a test file for reading.")
    content = safe_file_read(temp_file)
    print(f"✅ Safe file read (actual content): '{content[:30]}...'")
    temp_file.unlink()  # Clean up
    
except Exception as e:
    print(f"❌ Utilities test failed: {e}")
    import traceback
    traceback.print_exc()

# =============================================================================
# CELL 4: Test Basic Imports (Before Creating Missing Files)
# =============================================================================

print("\n🧪 Testing Basic Imports...")

# Test what we can import so far
try:
    print("Testing config imports...")
    from config import PipelineConfig
    print("✅ config.PipelineConfig")
    
    print("Testing utils imports...")
    from src.utils.logging_config import get_logger
    print("✅ src.utils.logging_config")
    
    from src.utils.file_utils import clean_filename
    print("✅ src.utils.file_utils")
    
    # Check what core modules exist
    src_core_path = project_root / "src" / "core"
    print(f"\n📁 Checking src/core directory: {src_core_path.exists()}")
    if src_core_path.exists():
        core_files = list(src_core_path.iterdir())
        print(f"   Core files: {[f.name for f in core_files]}")
    
    # Check downloaders
    src_downloaders_path = project_root / "src" / "downloaders"
    print(f"📁 Checking src/downloaders directory: {src_downloaders_path.exists()}")
    if src_downloaders_path.exists():
        downloader_files = list(src_downloaders_path.iterdir())
        print(f"   Downloader files: {[f.name for f in downloader_files]}")
    
    # Check chat
    src_chat_path = project_root / "src" / "chat"
    print(f"📁 Checking src/chat directory: {src_chat_path.exists()}")
    if src_chat_path.exists():
        chat_files = list(src_chat_path.iterdir())
        print(f"   Chat files: {[f.name for f in chat_files]}")

except Exception as e:
    print(f"❌ Basic imports test failed: {e}")
    import traceback
    traceback.print_exc()

# =============================================================================
# CELL 5: Integration Test & Summary
# =============================================================================

print("\n🧪 Integration Test Summary...")

def test_available_components():
    """Test which components are available and working."""
    working_components = []
    failed_components = []
    
    # Test config
    try:
        from config import PipelineConfig
        config = PipelineConfig({'chunk_size': 100})
        working_components.append("✅ Configuration System")
    except Exception as e:
        failed_components.append(f"❌ Configuration System: {e}")
    
    # Test utils
    try:
        from src.utils import get_logger, clean_filename
        logger = get_logger("test")
        cleaned = clean_filename("test/file")
        working_components.append("✅ Utilities")
    except Exception as e:
        failed_components.append(f"❌ Utilities: {e}")
    
    # Test core (may not exist yet)
    try:
        from src.core import DocumentProcessor
        working_components.append("✅ Core - Document Processing")
    except Exception as e:
        failed_components.append(f"❌ Core - Document Processing: {str(e)[:50]}...")
    
    # Test downloaders (may not exist yet)
    try:
        from src.downloaders import BibtexParser
        working_components.append("✅ Downloaders - BibTeX Parser")
    except Exception as e:
        failed_components.append(f"❌ Downloaders - BibTeX Parser: {str(e)[:50]}...")
    
    # Test chat (may not exist yet) 
    try:
        from src.chat import ChatInterface
        working_components.append("✅ Chat Interface")
    except Exception as e:
        failed_components.append(f"❌ Chat Interface: {str(e)[:50]}...")
    
    return working_components, failed_components

working, failed = test_available_components()

print("📊 COMPONENT STATUS:")
print("\n🟢 Working Components:")
for component in working:
    print(f"   {component}")

print(f"\n🔴 Missing/Failed Components:")
for component in failed:
    print(f"   {component}")

print(f"\n📈 PROGRESS SUMMARY:")
print(f"   Working: {len(working)}/{len(working) + len(failed)} components")
print(f"   Success rate: {len(working)/(len(working) + len(failed))*100:.1f}%")

if len(working) >= 2:
    print("\n🎉 BASIC INFRASTRUCTURE WORKING!")
    print("✅ Configuration and utilities are functional")
    print("📝 Next: Create the remaining modules (core, downloaders, chat)")
else:
    print("\n⚠️  Need to fix basic infrastructure first")

print("\n🚀 Ready for next phase of testing!")

✅ Project root: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis_project_modular
✅ Added project root to Python path
✅ Python path configured correctly
🧪 Testing Configuration Module (with path fixes)...
📊 Default config paths (problematic):
   Literature folder: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis_project_modular/notebooks/documents/literature
   Cache file: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis_project_modular/notebooks/physics_knowledge_base.pkl

✅ Corrected config paths:
   Project root: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis_project_modular
   Literature folder: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis_project_modular/documents/literature
   Cache file: /Users/fanza/Desktop/Projects/AcademicAssistantExperiments/physics_synthesis_project_modular/physics_knowledge_base.pkl
   Embedding model: all-MiniLM-

In [None]:
# Investigate the OpenMP conflict
import threadpoolctl
info = threadpoolctl.threadpool_info()
for item in info:
    print(f"Library: {item.get('filepath', 'unknown')}")
    print(f"  Internal API: {item.get('internal_api', 'unknown')}")
    print(f"  Threading layer: {item.get('threading_layer', 'unknown')}")
    print()