# NonBDNAFinder - Quick Demo

This notebook demonstrates the basic functionality of NonBDNAFinder for detecting Non-B DNA structures.

## Cell 1: Upload FASTA File

Upload a FASTA file containing your DNA sequence for analysis.

In [None]:
# Import the NonBDNAFinder scanner and required libraries
import sys
import os

# Add the repository root to the Python path for local execution
# This allows importing modules when running the notebook locally
repo_root = os.path.dirname(os.path.abspath('__file__')) if '__file__' in dir() else os.getcwd()
if repo_root not in sys.path:
    sys.path.insert(0, repo_root)

from Utilities.nonbscanner import analyze_sequence
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from Bio import SeqIO
from Utilities.utilities import validate_sequence
import io

# Global variables to store uploaded sequence
sample_sequence = None
sequence_name = None

# Create file upload widget
file_uploader = widgets.FileUpload(
    accept='.fasta,.fa,.fna,.txt',  # Accept FASTA files
    multiple=False,  # Only one file at a time
    description='Upload FASTA:',
    button_style='primary'
)

# Output widget to display feedback
output = widgets.Output()

def on_file_upload(change):
    """Handle file upload and parse FASTA content"""
    global sample_sequence, sequence_name
    
    with output:
        output.clear_output()
        
        if len(file_uploader.value) == 0:
            print("No file uploaded yet.")
            return
        
        # Get uploaded file content
        uploaded_file = list(file_uploader.value.values())[0]
        file_content = uploaded_file['content']
        file_name = list(file_uploader.value.keys())[0]
        
        try:
            # Parse FASTA file
            fasta_string = file_content.decode('utf-8')
            fasta_io = io.StringIO(fasta_string)
            
            # Read first sequence from FASTA
            records = list(SeqIO.parse(fasta_io, "fasta"))
            
            if len(records) == 0:
                print("Error: No valid FASTA sequences found in the file.")
                print("Please upload a valid FASTA file.")
                sample_sequence = None
                sequence_name = None
                return
            
            # Use the first sequence
            first_record = records[0]
            temp_sequence = str(first_record.seq).upper()
            temp_name = first_record.id if first_record.id else file_name
            
            # Validate the DNA sequence
            is_valid, error_msg = validate_sequence(temp_sequence)
            if not is_valid:
                print(f"Error: Invalid DNA sequence - {error_msg}")
                print("\nPlease ensure your sequence contains only valid DNA characters.")
                sample_sequence = None
                sequence_name = None
                return
            
            # Sequence is valid, store it
            sample_sequence = temp_sequence
            sequence_name = temp_name
            
            # Display success message
            print("FASTA file uploaded successfully!")
            print(f"\nFile: {file_name}")
            print(f"Sequence name: {sequence_name}")
            print(f"Sequence length: {len(sample_sequence):,} bp")
            
            if len(records) > 1:
                print(f"\nNote: File contains {len(records)} sequences. Using the first one.")
            
            # Show preview of sequence
            preview_length = min(80, len(sample_sequence))
            print(f"\nSequence preview (first {preview_length} bp):")
            print(sample_sequence[:preview_length])
            if len(sample_sequence) > preview_length:
                print("...")
            
            print("\nSequence validated! Ready for analysis.")
            print("Run the next cell to analyze this sequence.")
            
        except Exception as e:
            print(f"Error parsing FASTA file: {str(e)}")
            print("\nPlease ensure your file is in valid FASTA format:")
            print(">sequence_name")
            print("ATCGATCGATCG...")
            sample_sequence = None
            sequence_name = None

# Attach event handler
file_uploader.observe(on_file_upload, names='value')

# Display the upload widget and output
print("NonBDNAFinder - File Upload")
print("=" * 50)
print("Please upload a FASTA file containing your DNA sequence:")
print()
display(file_uploader)
display(output)

## Cell 2: Analyze the Sequence for Non-B DNA Motifs

In [None]:
# Run the NonBDNAFinder analysis
# This detects 11 different classes of Non-B DNA structures

# Check if a sequence has been uploaded
if sample_sequence is None or sequence_name is None:
    print("Error: No sequence uploaded!")
    print("\nPlease run Cell 1 and upload a FASTA file first.")
else:
    print("Running NonBDNAFinder analysis...")
    motifs = analyze_sequence(sample_sequence, sequence_name)

    # Display summary of results
    print(f"\nAnalysis complete!")
    print(f"Total motifs detected: {len(motifs)}")

    # Count motifs by class
    if motifs:
        df = pd.DataFrame(motifs)
        motif_counts = df['Class'].value_counts()
        print("\nMotifs detected by class:")
        for motif_class, count in motif_counts.items():
            print(f"  - {motif_class}: {count}")
    else:
        print("No motifs detected in this sequence.")

## Cell 3: Display and Visualize Results

In [None]:
# Display detailed results in a formatted table
if motifs:
    # Convert to DataFrame for better visualization
    results_df = pd.DataFrame(motifs)
    
    # Select key columns to display
    display_columns = ['Class', 'Subclass', 'Start', 'End', 'Length', 'Score', 'Strand']
    available_columns = [col for col in display_columns if col in results_df.columns]
    
    print("Detailed Results:")
    print("=" * 80)
    display_df = results_df[available_columns].copy()
    
    # Format the Score column if it exists
    if 'Score' in display_df.columns:
        display_df['Score'] = display_df['Score'].apply(lambda x: f"{x:.3f}" if pd.notnull(x) else "N/A")
    
    # Display the results
    print(display_df.to_string(index=False))
    print("=" * 80)
    
    # Summary statistics
    print("\nSummary Statistics:")
    print(f"  Total motifs found: {len(results_df)}")
    print(f"  Unique motif classes: {results_df['Class'].nunique()}")
    if 'Length' in results_df.columns:
        print(f"  Average motif length: {results_df['Length'].mean():.1f} bp")
        print(f"  Longest motif: {results_df['Length'].max()} bp ({results_df.loc[results_df['Length'].idxmax(), 'Class']})")
    
    # Optional: Export results to CSV
    output_file = "nonbdna_results.csv"
    results_df.to_csv(output_file, index=False)
    print(f"\nResults exported to: {output_file}")
else:
    print("No motifs were detected in the analyzed sequence.")
    print("Try analyzing a different sequence or a longer sequence with more complex patterns.")