In [1]:
# Simple LangChain Analyzer - Interactive Version
# Choose your option and enter your source

# Cell 1: Setup
from config import Config
from utils.file_processor import FileProcessor
from chains.extraction_chain import DocumentExtractionChain
from chains.analysis_chain import DocumentAnalysisChain
from rich.console import Console
from rich.table import Table
from rich.panel import Panel
import json
import os
from pathlib import Path

# Initialize
fp = FileProcessor()
ec = DocumentExtractionChain()
ac = DocumentAnalysisChain()
console = Console()

print("🔗 Simple Analyzer Ready!")

# Cell 2: Interactive Menu - Run this cell to choose
print("📋 Choose Analysis Type:")
print("1. 🌐 URL - Analyze a webpage")
print("2. 📄 FILE - Analyze a local file") 
print("3. 📝 TEXT - Analyze text directly")
print("")

# Get user choice
choice = input("Enter choice (1, 2, or 3): ").strip()

if choice == "1":
    analysis_type = "URL"
    source = input("🌐 Enter webpage URL: ").strip()
    print(f"✅ Will analyze URL: {source}")
    
elif choice == "2":
    analysis_type = "FILE"
    print("📄 Available files in data/input:")
    
    # List files in input directory
    input_files = fp.get_files_in_directory("data/input")
    if input_files:
        for i, file in enumerate(input_files, 1):
            print(f"  {i}. {Path(file).name}")
        print("")
        
        file_choice = input("Enter file number or full path: ").strip()
        
        # If user entered a number, use that file
        if file_choice.isdigit() and 1 <= int(file_choice) <= len(input_files):
            source = input_files[int(file_choice) - 1]
        else:
            source = file_choice
    else:
        print("No files found in data/input directory")
        source = input("📄 Enter full file path: ").strip()
    
    print(f"✅ Will analyze file: {source}")
    
elif choice == "3":
    analysis_type = "TEXT"
    print("📝 Enter your text (press Enter on empty line when finished):")
    lines = []
    while True:
        line = input()
        if line == "":
            break
        lines.append(line)
    source = "\n".join(lines)
    print(f"✅ Will analyze text: {len(source)} characters")
    
else:
    print("❌ Invalid choice! Please run this cell again and choose 1, 2, or 3.")
    analysis_type = None
    source = None

# Cell 3: Run Analysis - Only run after Cell 2
if 'analysis_type' in locals() and analysis_type and source:
    print(f"\n🚀 Starting {analysis_type} analysis...")
    
    try:
        # Read/get content
        if analysis_type == "TEXT":
            content = source
        else:
            content = fp.read_source(source)
        
        print(f"✅ Content loaded: {len(content)} characters")
        
        # Extract structured data
        print("🔍 Extracting data...")
        structured = ec.extract(content)
        
        # Analyze
        print("📊 Analyzing...")
        analysis = ac.analyze(structured)
        
        print("✅ Analysis complete!")
        
    except Exception as e:
        print(f"❌ Error: {e}")
        structured = None
        analysis = None
else:
    print("❌ Please run Cell 2 first to choose your analysis type!")

# Cell 4: Display Results - Only run after Cell 3
if 'structured' in locals() and structured and 'analysis' in locals() and analysis:
    # Main results table
    table = Table(title="📊 Analysis Results")
    table.add_column("Metric", style="cyan")
    table.add_column("Value", style="magenta")

    table.add_row("Source", source[:50] + "..." if len(source) > 50 else source)
    table.add_row("Title", structured.title)
    table.add_row("Type", structured.document_type)
    table.add_row("Entities", str(len(structured.entities)))
    table.add_row("Facts", str(len(structured.facts)))
    table.add_row("Topics", ", ".join(structured.topics))
    table.add_row("Sentiment", f"{analysis.sentiment_score:.2f}")
    table.add_row("Complexity", f"{analysis.complexity_score}/10")

    console.print(table)

    # Key insights
    if analysis.key_insights:
        insights = "\n".join([f"• {insight}" for insight in analysis.key_insights])
        console.print(Panel(insights, title="💡 Key Insights", border_style="green"))

    # Recommendations
    if analysis.recommendations:
        recs = "\n".join([f"• {rec}" for rec in analysis.recommendations])
        console.print(Panel(recs, title="🎯 Recommendations", border_style="blue"))

    # Show entities if any
    if structured.entities:
        print(f"\n👥 Found {len(structured.entities)} entities:")
        for entity in structured.entities[:5]:  # Show first 5
            print(f"  • {entity.name} ({entity.type})")

    # Show top facts if any
    if structured.facts:
        print(f"\n📋 Top facts (importance ranked):")
        sorted_facts = sorted(structured.facts, key=lambda x: x.importance, reverse=True)
        for fact in sorted_facts[:3]:  # Show top 3
            print(f"  • {fact.statement} (Importance: {fact.importance}/10)")

else:
    print("❌ No results to display. Please run Cells 2 and 3 first!")

# Cell 5: Save Results - Only run after Cell 4
if 'analysis' in locals() and analysis and 'structured' in locals() and structured:
    save_choice = input("\n💾 Save results? (y/n): ").strip().lower()
    
    if save_choice == 'y':
        # Create filename
        if analysis_type == "URL":
            from urllib.parse import urlparse
            parsed = urlparse(source)
            filename = f"web_{parsed.netloc}_results.json"
        elif analysis_type == "FILE":
            filename = f"{Path(source).stem}_results.json"
        else:
            filename = "text_analysis_results.json"
        
        # Save data
        os.makedirs("data/output", exist_ok=True)
        output_file = Path("data/output") / filename
        
        results_data = {
            "source": source,
            "type": analysis_type.lower(),
            "structured_document": structured.model_dump(),
            "analysis_result": analysis.model_dump()
        }
        
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(results_data, f, indent=2, default=str)
        
        print(f"💾 Results saved to: {output_file}")
    else:
        print("💾 Results not saved")

    print("\n✨ Analysis Complete!")
else:
    print("❌ No results to save. Please complete the analysis first!")

🔗 Simple Analyzer Ready!
📋 Choose Analysis Type:
1. 🌐 URL - Analyze a webpage
2. 📄 FILE - Analyze a local file
3. 📝 TEXT - Analyze text directly

Enter choice (1, 2, or 3): 2
📄 Available files in data/input:
  1. OptimalProteinIntakeandFrequncy.pdf

Enter file number or full path: 1
✅ Will analyze file: data\input\OptimalProteinIntakeandFrequncy.pdf

🚀 Starting FILE analysis...
✅ Content loaded: 20854 characters
🔍 Extracting data...
🤖 Sending request to Ollama...


  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
            You are an expert document analyzer. Extract structured information from the following text.
            
            Text to analyze:
            See discussions, st ats, and author pr ofiles f or this public ation at : https://www .researchgate.ne t/public ation/288150322
Optimal protein intake to maximize muscle protein synthesis Examinations
of optimal meal protein intake and frequency for athletes
Article    in  Agro Food Industr y Hi T ech · Mar ch 2009
CITATIONS
16READS
24,212
2 author s, including:
Layne Nort on
BioLayne LL C
33 PUBLICA TIONS    1,289  CITATIONS    
SEE PROFILE
All c ontent f ollo wing this p age was uplo aded b y Layne Nort on on 01 Dec ember 2016.
The user has r equest ed enhanc ement of the do wnlo aded file.
54 AgroFood  industry hi-tech -  March/April 2009 -  vol 20 n 2 Sport nutritionProtein “ needS” ver SuS oPtimal Protein intake fo r 
athlete S
dietary protein 


[1m> Finished chain.[0m
📝 Raw LLM Response (first 200 chars): Here is the extracted information:

```
{
  "title": "Optimizing Protein Synthesis for Athletes",
  "summary": "Athletes can benefit from consuming protein levels above the RDA to maximize muscle mass...
✅ Successfully parsed JSON response
📊 Analyzing...
🤖 Sending analysis request to Ollama...


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
            You are a strategic analyst. Analyze the following structured document data and provide insights.
            
            Structured Document Data:
            {
  "title": "Optimizing Protein Synthesis for Athletes",
  "summary": "Athletes can benefit from consuming protein levels above the RDA to maximize muscle mass and strength. Maximization of protein synthesis (MPS) in response to a meal depends on consuming sufficient leucine.",
  "entities": [
    {
      "name": "L.E. Norton",
      "type": "person",
      "context": "researcher",


👥 Found 4 entities:
  • L.E. Norton (person)
  • d.K. Layman (person)
  • W.H.O. (organization)
  • FAO/UNU (organization)

📋 Top facts (importance ranked):
  • The amino acid leucine is responsible for much of the anabolic properties of a meal. (Importance: 9/10)
  • The amount of protein required at a meal to achieve maximum MPS depends on the leucine content of the protein source. (Importance: 9/10)
  • Consuming protein levels well above the RDA can benefit athletes looking to maximize muscle mass and strength. (Importance: 8/10)

💾 Save results? (y/n): n
💾 Results not saved

✨ Analysis Complete!
