In [None]:
# @title üõ†Ô∏è Installation & Setup { run: "auto", display-mode: "form" }
%pip install -q rolypoly-bio
!apt-get update -q
!apt-get install -q -y aria2 pigz

# Install bioinformatics tools needed for RNA annotation
import subprocess
import sys

def run_conda_install(packages):
    """Install packages via conda"""
    cmd = f"conda install -c bioconda -c conda-forge -y {' '.join(packages)}"
    subprocess.run(cmd, shell=True, check=True)

# Install bioinformatics tools
bioinf_tools = [
    "viennarna",  # RNAfold
    "linearfold",  # LinearFold  
    "trnascan-se",  # tRNA detection
    "aragorn",  # Alternative tRNA detection
    "infernal",  # cmsearch for ribozyme detection
]

try:
    run_conda_install(bioinf_tools)
    print("‚úÖ Successfully installed bioinformatics tools")
except Exception as e:
    print(f"‚ö†Ô∏è Some tools may not be available: {e}")
    print("Demo will continue with available tools")


In [None]:
# @title üì¶ Import Required Libraries { display-mode: "form" }
import os
import json
import tempfile
from pathlib import Path
from typing import Dict, List, Optional, Union

import ipywidgets as widgets
from IPython.display import display, HTML, FileLink
import pandas as pd

# Import RolyPoly modules
try:
    from rolypoly.commands.annotation.annotate_RNA import (
        RNAAnnotationConfig,
        process_RNA_annotations
    )
    from rolypoly.utils.various import ensure_memory
    print("‚úÖ RolyPoly imported successfully")
except ImportError as e:
    print(f"‚ùå Error importing RolyPoly: {e}")
    print("Please ensure rolypoly-bio is properly installed")

# Create output directory
output_base = Path("/content/rolypoly_output")
output_base.mkdir(exist_ok=True)

print(f"üìÅ Output directory: {output_base}")


In [None]:
# @title üì§ File Upload Interface { display-mode: "form" }
# File upload widget
upload_widget = widgets.FileUpload(
    accept='.fasta,.fa,.fas,.fna',
    multiple=False,
    description='Upload FASTA',
    style={'button_color': '#4CAF50'}
)

upload_status = widgets.HTML(
    value="<p>üìÅ Please upload a FASTA file to begin</p>"
)

uploaded_files = {}

def handle_upload(change):
    """Handle file upload"""
    global uploaded_files
    
    if change['new']:
        for filename, file_info in change['new'].items():
            # Save uploaded file
            upload_path = output_base / filename
            with open(upload_path, 'wb') as f:
                f.write(file_info['content'])
            
            uploaded_files[filename] = str(upload_path)
            
            # Update status
            file_size = len(file_info['content']) / 1024  # KB
            upload_status.value = f"""<div style='color: green;'>
            ‚úÖ <strong>Uploaded:</strong> {filename} ({file_size:.1f} KB)<br>
            üìç <strong>Path:</strong> {upload_path}
            </div>"""
            
            # Basic FASTA validation
            try:
                with open(upload_path, 'r') as f:
                    content = f.read(1000)  # Read first 1KB
                    if not content.startswith('>'):
                        upload_status.value += "<p style='color: orange;'>‚ö†Ô∏è Warning: File doesn't appear to be in FASTA format</p>"
                    else:
                        seq_count = content.count('>')
                        upload_status.value += f"<p style='color: blue;'>üìä Found {seq_count}+ sequences</p>"
            except Exception as e:
                upload_status.value += f"<p style='color: red;'>‚ùå Error reading file: {e}</p>"

upload_widget.observe(handle_upload, names='value')

display(widgets.VBox([
    widgets.HTML("<h3>üì§ Upload FASTA File</h3>"),
    upload_widget,
    upload_status
]))


In [None]:
# @title ‚öôÔ∏è Configuration Parameters { display-mode: "form" }
# Parameter widgets
threads_widget = widgets.IntSlider(
    value=2,
    min=1,
    max=8,
    step=1,
    description='Threads:',
    style={'description_width': '150px'}
)

memory_widget = widgets.Dropdown(
    options=['2gb', '4gb', '6gb', '8gb', '12gb'],
    value='4gb',
    description='Memory:',
    style={'description_width': '150px'}
)

# Secondary structure tool
secondary_tool_widget = widgets.Dropdown(
    options=['LinearFold', 'RNAfold'],
    value='LinearFold',
    description='Secondary Structure:',
    style={'description_width': '150px'}
)

# IRES detection tool
ires_tool_widget = widgets.Dropdown(
    options=['IRESfinder', 'IRESpy'],
    value='IRESfinder',
    description='IRES Tool:',
    style={'description_width': '150px'}
)

# tRNA detection tool
trna_tool_widget = widgets.Dropdown(
    options=['tRNAscan-SE', 'aragorn'],
    value='tRNAscan-SE',
    description='tRNA Tool:',
    style={'description_width': '150px'}
)

# Output format
output_format_widget = widgets.Dropdown(
    options=['tsv', 'csv', 'gff3'],
    value='tsv',
    description='Output Format:',
    style={'description_width': '150px'}
)

# Steps to skip
skip_steps_widget = widgets.SelectMultiple(
    options=[
        'predict_secondary_structure',
        'search_ribozymes', 
        'detect_ires',
        'predict_trnas',
        'search_rna_elements'
    ],
    value=[],
    description='Skip Steps:',
    style={'description_width': '150px'},
    rows=5
)

# Advanced parameters
advanced_params_widget = widgets.Textarea(
    value='{}',
    placeholder='Enter JSON parameters to override defaults, e.g.: {"RNAfold": {"temperature": 37}}',
    description='Advanced Params:',
    style={'description_width': '150px'},
    layout=widgets.Layout(width='100%', height='100px')
)

# Display configuration widgets
config_widgets = widgets.VBox([
    widgets.HTML("<h3>‚öôÔ∏è Configuration Parameters</h3>"),
    widgets.HBox([threads_widget, memory_widget]),
    widgets.HBox([secondary_tool_widget, ires_tool_widget]),
    widgets.HBox([trna_tool_widget, output_format_widget]),
    widgets.HTML("<h4>Skip Steps (optional):</h4>"),
    skip_steps_widget,
    widgets.HTML("<h4>Advanced Parameters (JSON format):</h4>"),
    advanced_params_widget,
])

display(config_widgets)


In [None]:
# @title üöÄ Run RNA Annotation Analysis { display-mode: "form" }
# Progress and results display
progress_output = widgets.Output()
results_output = widgets.Output()

def run_rna_annotation(button):
    """Run the RNA annotation pipeline"""
    with progress_output:
        progress_output.clear_output()
        
        # Validate input
        if not uploaded_files:
            print("‚ùå Please upload a FASTA file first")
            return
        
        # Get the first uploaded file
        input_file = list(uploaded_files.values())[0]
        print(f"üìÑ Processing: {Path(input_file).name}")
        
        # Parse advanced parameters
        try:
            override_params = json.loads(advanced_params_widget.value) if advanced_params_widget.value.strip() else {}
        except json.JSONDecodeError as e:
            print(f"‚ùå Invalid JSON in advanced parameters: {e}")
            return
        
        # Setup output directory
        analysis_output = output_base / f"rna_annotation_{Path(input_file).stem}"
        analysis_output.mkdir(exist_ok=True)
        
        print(f"üìÅ Output directory: {analysis_output}")
        print(f"‚öôÔ∏è Configuration:")
        print(f"  - Threads: {threads_widget.value}")
        print(f"  - Memory: {memory_widget.value}")
        print(f"  - Secondary structure tool: {secondary_tool_widget.value}")
        print(f"  - IRES tool: {ires_tool_widget.value}")
        print(f"  - tRNA tool: {trna_tool_widget.value}")
        print(f"  - Output format: {output_format_widget.value}")
        if skip_steps_widget.value:
            print(f"  - Skipping steps: {list(skip_steps_widget.value)}")
        
        try:
            # Create configuration object
            config = RNAAnnotationConfig(
                input=Path(input_file),
                output_dir=analysis_output,
                threads=threads_widget.value,
                log_file=analysis_output / "rna_annotation.log",
                log_level="INFO",
                memory=memory_widget.value,
                override_params=override_params,
                skip_steps=list(skip_steps_widget.value),
                secondary_structure_tool=secondary_tool_widget.value,
                ires_tool=ires_tool_widget.value,
                trna_tool=trna_tool_widget.value,
                output_format=output_format_widget.value,
                cm_db="Rfam",
                motif_db="RolyPoly"
            )
            
            print("\nüöÄ Starting RNA annotation pipeline...")
            
            # Run the annotation
            process_RNA_annotations(config)
            
            print("\n‚úÖ RNA annotation completed successfully!")
            
            # Display results
            display_results(analysis_output)
            
        except Exception as e:
            print(f"\n‚ùå Error during annotation: {str(e)}")
            import traceback
            print("\nDetailed error:")
            traceback.print_exc()

def display_results(output_dir):
    """Display analysis results"""
    with results_output:
        results_output.clear_output()
        
        print("\nüìä Analysis Results:")
        print("=" * 50)
        
        # List output files
        output_files = list(output_dir.glob("*"))
        
        if output_files:
            print(f"\nüìÅ Output files ({len(output_files)} total):")
            for file_path in sorted(output_files):
                if file_path.is_file():
                    size_kb = file_path.stat().st_size / 1024
                    print(f"  üìÑ {file_path.name} ({size_kb:.1f} KB)")
                    
                    # Preview results files
                    if file_path.suffix in ['.tsv', '.csv'] and size_kb < 1000:  # Preview small result files
                        try:
                            df = pd.read_csv(file_path, sep='\t' if file_path.suffix == '.tsv' else ',')
                            if len(df) > 0:
                                print(f"    üìã Preview of {file_path.name}:")
                                print(f"    Rows: {len(df)}, Columns: {len(df.columns)}")
                                if len(df) <= 5:
                                    display(df)
                                else:
                                    display(df.head())
                                print("\n")
                        except Exception as e:
                            print(f"    ‚ö†Ô∏è Could not preview: {e}")
        else:
            print("No output files found. Check the log for errors.")
        
        # Show log file if exists
        log_file = output_dir / "rna_annotation.log"
        if log_file.exists():
            print(f"\nüìã Log file: {log_file.name}")
            with open(log_file, 'r') as f:
                log_content = f.read()
                if len(log_content) > 2000:  # Show last 2KB if log is large
                    print("Last 2KB of log:")
                    print(log_content[-2000:])
                else:
                    print(log_content)

# Run button
run_button = widgets.Button(
    description='üöÄ Run RNA Annotation',
    button_style='success',
    layout=widgets.Layout(width='200px', height='40px')
)
run_button.on_click(run_rna_annotation)

display(widgets.VBox([
    widgets.HTML("<h3>üöÄ Execute Analysis</h3>"),
    run_button,
    progress_output,
    results_output
]))


In [None]:
# @title üß™ Sample Data { display-mode: "form" }
def create_sample_data(button):
    """Create sample RNA data for testing"""
    global uploaded_files
    
    # Sample viral RNA sequences (simplified for demo)
    sample_sequences = {
        "sample_viral_rna.fasta": """>NC_001617.1 Human rhinovirus 14, complete genome
TTAAAACAGCCTGTGGGTTGATCCCCCACCCTTGTGCCACGCATTGTAGACTTGTACACATGGTGCGTTTGCC
ACCTGATCCGCGTAAACAGGCTAACGGCGAGGAGGAATACTATGGCATCGGCGTGATAGCGCTGGATAGGGT
GTTAACCCGCCCGGAAGGGGCAGGGGCCCGGGGAAAACCAGGTAGCCGCCTTAATGACCGCGCCAGTGTTACC
AATGACAGCTGAGGTGCTGATAGGTGTGGTGGGGATGCTAATGGGAGGTACCTCTACGTGGGTAGGATCCTA
CTAATGTAACCCCCCTGATTTGGCCCCAGGATACTAGTATGTTAGCGCTAATCGGATGAGGATGGTGTGGTTG
CGCATACCCAACGCTGACCCACTTGAGGTCAACCCCTGATGAACCCTCCAGGCCAATCTAGGATACTTGAGGG
>NC_003977.2 Hepatitis C virus subtype 1a, complete genome
GCCAGCCCCCTGATGGGGGCGACACTCCACCATAGATCACTCCCCTGTGAGGAACTACTGTCTTCACGCAGA
AAGCGTCTAGCCATGGCGTTAGTATGAGTGTCGTGCAGCCTCCAGGACCCCCCCTCCCGGGAGAGCCATAGTG
GTCTGCGGAACCGGTGAGTACACCGGAATTGCCAGGACGACCGGGTCCTTTCTTGGATAAACCCGCTCAATGC
CTGGAGATTTGGGCGTGCCCCCGCAAGACTGCTAGCCGAGTAGTGTTGGGTCGCGAAAGGCCTTGTGGTACTG
CCTGATAGGGTGCTTGCGAGTGCCCCGGGAGGTCTCGTAGACCGTGCACCATGAGCACGAATCCTAAACCTCA
AAGAAAAACCAAACGTAACACCAACCGTCGCCCACAGGACGTCAAGTTCCCGGGCGGTGGTCAGATCGTTGGT
>NC_045512.2 SARS-CoV-2, complete genome (partial)
ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACG
AACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACTCACGCAGTATAATTAATAACTAAT
TACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAG
CCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTT
CAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG
AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGGCTTAGTAGAAGA

"""
    }
    
    # Save sample data
    for filename, content in sample_sequences.items():
        file_path = output_base / filename
        with open(file_path, 'w') as f:
            f.write(content)
        
        uploaded_files[filename] = str(file_path)
    
    # Update upload status
    upload_status.value = """<div style='color: blue;'>
    ‚úÖ <strong>Sample data created:</strong> sample_viral_rna.fasta<br>
    üìä Contains 3 viral RNA sequences for testing<br>
    üß™ Ready for RNA annotation analysis!
    </div>"""
    
    print("‚úÖ Sample viral RNA data created successfully!")
    print("üìÑ Created: sample_viral_rna.fasta")
    print("üß¨ Contains sequences from:")
    print("  - Human rhinovirus 14")
    print("  - Hepatitis C virus")
    print("  - SARS-CoV-2 (partial)")
    print("\nYou can now run the RNA annotation analysis!")

sample_button = widgets.Button(
    description='üß™ Create Sample Data',
    button_style='info',
    layout=widgets.Layout(width='200px', height='40px')
)
sample_button.on_click(create_sample_data)

display(widgets.VBox([
    widgets.HTML("<h3>üß™ Try with Sample Data</h3>"),
    widgets.HTML("<p>Click the button below to create sample viral RNA sequences for testing:</p>"), 
    sample_button
]))
