# SV-Agent Jupyter Demo

This notebook demonstrates how to use sv-agent in Jupyter for:
1. Converting GATK-SV WDL workflows to CWL
2. Analyzing workflow structure
3. Using the chat interface programmatically
4. Batch processing configurations

## Installation

If you're starting fresh on a new Linux environment, first clone and install sv-agent:

```bash
# Clone the repository with submodules
git clone --recursive https://github.com/yourusername/sv-agent.git
cd sv-agent

# Run the setup script
./setup.sh

# Or manually:
git submodule update --init --recursive
pip install -e .
pip install -e submodules/awlkit/
```

## Setup

For existing installations, ensure sv-agent is installed:

In [None]:
# Install sv-agent if not already installed
import sys
!{sys.executable} -m pip install -e /workspaces/sv-agent
!{sys.executable} -m pip install -e /workspaces/sv-agent/submodules/awlkit/

In [None]:
# Import required libraries
from sv_agent import SVAgent, SVAgentChat
from sv_agent.knowledge import SVKnowledgeBase
import json
from pathlib import Path

## 1. Convert GATK-SV Workflows to CWL

In [None]:
# Initialize the agent
agent = SVAgent()

# Convert specific modules
output_dir = "cwl_output"
modules = ["GatherSampleEvidence", "EvidenceQC"]

print(f"Converting modules: {modules}")
results = agent.convert_gatksv_to_cwl(
    output_dir=output_dir,
    modules=modules,
    validate=False  # Set to True to validate CWL output
)

# Display results
for result in results:
    status = "✓" if result['success'] else "✗"
    print(f"{status} {result['module']}: {result.get('output_path', result.get('error'))}")

## 2. Analyze Workflow Structure

In [None]:
# Analyze a specific workflow
workflow_name = "GATKSVPipelineBatch"
analysis = agent.analyze_workflow(workflow_name)

if analysis:
    print(f"Workflow: {analysis['name']}")
    print(f"WDL Path: {analysis['wdl_path']}")
    print(f"\nInputs ({len(analysis['inputs'])}):\n")
    for inp in analysis['inputs'][:5]:  # Show first 5 inputs
        print(f"  - {inp['name']}: {inp['type']}")
    print(f"\nOutputs ({len(analysis['outputs'])}):\n")
    for out in analysis['outputs'][:5]:  # Show first 5 outputs
        print(f"  - {out['name']}: {out['type']}")
    print(f"\nTasks: {', '.join(analysis['tasks'][:5])}...")

## 3. Interactive Chat Interface

In [None]:
# Initialize chat agent
chat_agent = SVAgentChat()

# Ask a question programmatically
questions = [
    "What coverage depth do I need for reliable SV detection?",
    "What are the main steps in Module00a?",
    "How does GATK-SV handle complex structural variants?"
]

for question in questions:
    print(f"\n**Q: {question}**")
    response = chat_agent.ask(question)
    print(f"A: {response}")
    print("-" * 80)

## 4. Create Batch Configuration

In [None]:
# Create a batch configuration for processing
batch_config = {
    "samples": [
        {
            "id": "sample1",
            "bam": "/path/to/sample1.bam",
            "bai": "/path/to/sample1.bam.bai"
        },
        {
            "id": "sample2",
            "bam": "/path/to/sample2.bam",
            "bai": "/path/to/sample2.bam.bai"
        }
    ],
    "reference": "/path/to/reference.fa",
    "output_dir": "/path/to/output",
    "modules": ["GatherSampleEvidence", "EvidenceQC"]
}

# Save configuration
config_path = "batch_config.json"
with open(config_path, 'w') as f:
    json.dump(batch_config, f, indent=2)

print(f"Batch configuration saved to: {config_path}")
print(json.dumps(batch_config, indent=2))

## 5. Process Batch with Configuration

In [None]:
# Process the batch configuration
try:
    batch_results = agent.process_batch(config_path)
    print(f"Processed {len(batch_results['conversions'])} modules")
    print(f"Output directory: {batch_results['output_dir']}")
except Exception as e:
    print(f"Note: Batch processing requires valid file paths. Error: {e}")

## 6. List Available Modules

In [None]:
# Get knowledge base
kb = SVKnowledgeBase()

# List all available modules
print("Available GATK-SV Modules:\n")
for module, info in kb.modules.items():
    print(f"• {module}")
    print(f"  {info['description']}")
    print(f"  Category: {info['category']}")
    if info.get('dependencies'):
        print(f"  Dependencies: {', '.join(info['dependencies'])}")
    print()

## 7. Using with Ollama (Local LLM)

In [None]:
# Check if Ollama is available
import subprocess

try:
    result = subprocess.run(['ollama', 'list'], capture_output=True, text=True)
    if result.returncode == 0:
        print("Ollama is installed. Available models:")
        print(result.stdout)
        
        # Initialize chat with Ollama
        ollama_chat = SVAgentChat(llm_provider="ollama", ollama_model="gemma:2b")
        response = ollama_chat.ask("What is the purpose of Module00a?")
        print(f"\nOllama Response: {response}")
    else:
        print("Ollama is not available")
except FileNotFoundError:
    print("Ollama is not installed. Install from: https://ollama.ai")