In [None]:
# benchmark_suite.py
import os
import time
import pandas as pd
from Bio.Align.Applications import ClustalOmegaCommandline, MuscleCommandline
import subprocess
from youshan import YouShanAligner, youshan_progressive_msa

class MSABenchmarkSuite:
    def __init__(self, datasets_dir="benchmark_datasets"):
        self.datasets_dir = datasets_dir
        self.results = []

    def run_benchmark(self, dataset_name, tool_name, input_fasta):
        """Run MSA tool on dataset and measure performance"""
        output_fasta = f"results/{dataset_name}_{tool_name}_aligned.fasta"
        metrics = {'dataset': dataset_name, 'tool': tool_name}

        try:
            start_time = time.time()

            if tool_name == "youshan":
                model = YouShanAligner()
                sequences = [str(record.seq) for record in SeqIO.parse(input_fasta, "fasta")]
                aligned_file, _, _ = youshan_progressive_msa(sequences, model)

            elif tool_name == "clustal":
                clustal_cmd = ClustalOmegaCommandline(
                    infile=input_fasta,
                    outfile=output_fasta,
                    verbose=True,
                    auto=True
                )
                subprocess.run(str(clustal_cmd), shell=True, check=True)

            elif tool_name == "muscle":
                muscle_cmd = MuscleCommandline(
                    input=input_fasta,
                    out=output_fasta
                )
                subprocess.run(str(muscle_cmd), shell=True, check=True)

            execution_time = time.time() - start_time
            metrics['execution_time'] = execution_time

            # Calculate alignment metrics
            alignment_metrics = self.calculate_alignment_metrics(output_fasta)
            metrics.update(alignment_metrics)

        except Exception as e:
            print(f"Error running {tool_name} on {dataset_name}: {e}")
            metrics['error'] = str(e)

        self.results.append(metrics)
        return metrics

    def calculate_alignment_metrics(self, aligned_fasta):
        """Calculate various alignment quality metrics"""
        sequences = list(SeqIO.parse(aligned_fasta, "fasta"))

        if not sequences:
            return {'error': 'No sequences in alignment'}

        # Basic metrics
        aligned_lengths = [len(seq.seq) for seq in sequences]
        avg_length = sum(aligned_lengths) / len(aligned_lengths)
        length_variance = sum((x - avg_length) ** 2 for x in aligned_lengths) / len(aligned_lengths)

        # Conservation score
        max_len = max(aligned_lengths)
        conservation_scores = []

        for i in range(max_len):
            column_chars = []
            for seq in sequences:
                if i < len(seq.seq):
                    column_chars.append(seq.seq[i])
            if column_chars:
                most_common = max(set(column_chars), key=column_chars.count)
                conservation = column_chars.count(most_common) / len(column_chars)
                conservation_scores.append(conservation)

        avg_conservation = sum(conservation_scores) / len(conservation_scores) if conservation_scores else 0

        return {
            'num_sequences': len(sequences),
            'alignment_length': max_len,
            'length_variance': length_variance,
            'avg_conservation': avg_conservation,
            'total_gaps': sum(seq.seq.count('-') for seq in sequences)
        }

    def run_complete_benchmark(self):
        """Run complete benchmark suite on all datasets"""
        tools = ['youshan', 'clustal', 'muscle']
        datasets = [f for f in os.listdir(self.datasets_dir) if f.endswith('.fasta')]

        os.makedirs('results', exist_ok=True)

        for dataset_file in datasets:
            dataset_name = dataset_file.replace('.fasta', '')
            input_path = os.path.join(self.datasets_dir, dataset_file)

            print(f"\nBenchmarking {dataset_name}...")
            for tool in tools:
                print(f"  Running {tool}...")
                self.run_benchmark(dataset_name, tool, input_path)

        # Save results
        results_df = pd.DataFrame(self.results)
        results_df.to_csv('benchmark_results.csv', index=False)

        # Generate summary report
        self.generate_report(results_df)

        return results_df

    def generate_report(self, results_df):
        """Generate comprehensive benchmark report"""
        report = """
# You-Shan MSA Benchmark Report

## Summary
This report presents the benchmarking results of You-Shan against traditional MSA tools.

## Methods
- **You-Shan**: Fine-tuned transformer-based MSA
- **ClustalÎ©**: Progressive alignment with guide trees
- **MUSCLE**: Multiple sequence comparison by log-expectation

## Results
"""

        # Add results summary
        summary = results_df.groupby('tool').agg({
            'execution_time': 'mean',
            'avg_conservation': 'mean',
            'length_variance': 'mean'
        }).round(3)

        report += "\n### Performance Summary\n"
        report += summary.to_markdown()

        # Save report
        with open('benchmark_report.md', 'w') as f:
            f.write(report)

        print("Benchmark report generated: benchmark_report.md")

# Usage
if __name__ == "__main__":
    benchmark = MSABenchmarkSuite()
    results = benchmark.run_complete_benchmark()