# Helix-Aegis Interactive Screening Demo

This notebook allows you to interactively screen DNA sequences for biosecurity risks using the fine-tuned Helix-Aegis model.

In [None]:
# Install dependencies if running in a new environment
!pip install -r ../requirements.txt

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
from Bio.Seq import Seq
import ipywidgets as widgets
from IPython.display import display, clear_output
import sys
import os

# Add src to path to import utils if needed, or we define it here for standalone usage
sys.path.append(os.path.abspath('../src'))

In [None]:
# Configuration
BASE_MODEL_ID = "tumorailab/protein2text-llama3.1-8B-instruct-esm2-650M"
ADAPTER_PATH = "../Llama-HelixAegis-Final" # Path to your fine-tuned adapter

BIO_TAXONOMY = """
<BEGIN UNSAFE CONTENT CATEGORIES>
BS1: Regulated Toxins. 
BS2: Pathogens of Pandemic Potential. 
BS3: Virulence Factors. 
BS4: Antimicrobial Resistance. 
<END UNSAFE CONTENT CATEGORIES>
"""

In [None]:
def translate_dna_to_protein(dna_seq):
    """
    Translates a DNA sequence into 6 protein reading frames (3 forward, 3 reverse).
    """
    dna_seq = dna_seq.upper().replace("\n", "").replace(" ", "")
    seq_obj = Seq(dna_seq)
    
    frames = {}
    # Forward frames
    for i in range(3):
        frames[f"Forward_{i+1}"] = str(seq_obj[i:].translate())
        
    # Reverse frames
    rev_seq = seq_obj.reverse_complement()
    for i in range(3):
        frames[f"Reverse_{i+1}"] = str(rev_seq[i:].translate())
        
    return frames

In [None]:
print("Loading model... This may take a minute.")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

try:
    base_model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL_ID,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
    
    # Load Adapter
    # Note: If you haven't trained yet, this will fail. 
    # You can comment out the next line to test with just the base model.
    if os.path.exists(ADAPTER_PATH):
        model = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
        print("‚úÖ Fine-tuned Helix-Aegis Adapter Loaded!")
    else:
        model = base_model
        print("‚ö†Ô∏è Adapter not found. Using base model only.")

except Exception as e:
    print(f"Error loading model: {e}")

In [None]:
def screen_sequence(protein_seq):
    prompt = f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
Task: Screen the following protein sequence for biosecurity risks according to the taxonomy below.

{BIO_TAXONOMY}

<BEGIN SEQUENCE>
{protein_seq}
<END SEQUENCE>

Provide your safety assessment:
- First line: 'safe' or 'unsafe'
- Second line: Violated categories (if unsafe)<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    with torch.no_grad():
        outputs = model.generate(
            **inputs, 
            max_new_tokens=50, 
            pad_token_id=tokenizer.eos_token_id
        )
        
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response.split("assistant")[-1].strip()

In [None]:
# UI Components
dna_input = widgets.Textarea(
    value='',
    placeholder='Paste DNA Sequence here (ATGC...)...',
    description='DNA:',
    disabled=False,
    layout=widgets.Layout(width='100%', height='100px')
)

screen_btn = widgets.Button(
    description='Screen Sequence',
    button_style='danger', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Click to screen',
    icon='check'
)

output_area = widgets.Output()

def on_button_clicked(b):
    with output_area:
        clear_output()
        dna = dna_input.value.strip()
        if not dna:
            print("Please enter a DNA sequence.")
            return
            
        print(f"Translating and Screening... (Length: {len(dna)} bp)")
        
        try:
            frames = translate_dna_to_protein(dna)
            unsafe_found = False
            
            for name, seq in frames.items():
                # Simple heuristic to skip very short frames
                if len(seq) < 10:
                    continue
                    
                result = screen_sequence(seq)
                
                if "unsafe" in result.lower():
                    unsafe_found = True
                    print(f"\nüö® {name}: UNSAFE")
                    print(f"   Details: {result}")
                else:
                    print(f"‚úÖ {name}: Safe")
            
            if not unsafe_found:
                print("\n‚úÖ‚úÖ FINAL VERDICT: SAFE ‚úÖ‚úÖ")
            else:
                print("\nüö®üö® FINAL VERDICT: POTENTIAL THREAT DETECTED üö®üö®")
                
        except Exception as e:
            print(f"Error: {e}")

screen_btn.on_click(on_button_clicked)

display(dna_input, screen_btn, output_area)