In [1]:
from snapgene_reader import snapgene_file_to_seqrecord
from snapgene_reader import snapgene_file_to_dict

In [3]:
egfp_filepath = "EGFP.dna"
syfp2_filepath = "sYFP2.dna"
#egfp_seqrecord = snapgene_file_to_seqrecord(egfp_filepath)
#syfp2_seqrecord = snapgene_file_to_seqrecord(syfp2_filepath)

egfp_dict = snapgene_file_to_dict(egfp_filepath)
syfp2_dict = snapgene_file_to_dict(syfp2_filepath)

In [4]:
egfp_nt_seq = egfp_dict['seq']
syfp2_nt_seq = syfp2_dict['seq']

In [5]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

def align_dna_sequences(seq1, seq2):
    """Performs global alignment of two DNA sequences using Biopython."""
    alignments = pairwise2.align.globalxx(seq1, seq2, one_alignment_only=True)
    for a in alignments:
        print(format_alignment(*a))

# Example usage
align_dna_sequences(egfp_nt_seq, syfp2_nt_seq)

ATGGTGAGCAAGGGCGAGGAGCTGTTCACCGGGGTGGTGCCCATCCTGGTCGAGCTGGACGGCGACGTAAACGGCCACAAGTTCAGCGTGTCCGGCGAGGGCGAGGGCGATGCCACCTACGGCAAGCTGACCCTGAAGTTC--ATCTGCACCACCGGCAAGCTGCCCGTGCCCTGGCCCACCCTCGTGACCACCCTGAC--CTACGGCGTGCAGTGCTTCAGCC-GCTACCCCGACCACATGAAGCAGCACGACTTCTTCAAGTCCGCCATGCCCGAAGGCTACGTCCAGGAGCGCACCATCTTCTTCAAGGACGACGGCAACTACAAGACCCGCGCCGAGGTGAAGTTCGAGGGCGACACCCTGGTGAACCGCATCGAGCTGAAGGGCATCGACTTCAAGGAGGACGGCAACATCCTGGGGCACAAGCTGGAGTACAACTACAACAGCCACAACGTCTATATCATG--GCCGACAAGCAGAAGAACGGCATCAAGGTG--AACTTCAAGATCCGCCACAACATCGAGGACGGCAG-CGTGCAGCTCGCCGACCACTACCAGCAGAACACCCCCATCGGCGACGGCCCCGTGCTGCTGCCCGACAACCACTACCTGAGC-ACCCAGTCC--GCCCTGAGCAAAGACCCCAACGAGAAGCGCGATCACATGGTCCTGCTGGAGTTCGTGACCGCCGCCGGGATCACTCTCGGCATGGACGAGCTGTACAAGTAA
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||  |  ||||||||||||||||||||||||||||||||||||||||||||||||||||||    ||||||||||||||||||| ||| |||||||||||||||||||||||||||||||||||||||||



In [6]:
import matplotlib.pyplot as plt
import numpy as np
from Bio import pairwise2
import matplotlib.patches as mpatches

def simple_alignment_overview(seq1, seq2, seq1_name="Sequence 1", seq2_name="Sequence 2"):
    """
    Create a simple, clean alignment overview visualization
    """
    # Perform alignment
    alignments = pairwise2.align.globalxx(seq1, seq2, one_alignment_only=True)
    alignment = alignments[0]
    
    aligned_seq1, aligned_seq2, score, begin, end = alignment
    
    # Calculate basic statistics
    matches = sum(1 for a, b in zip(aligned_seq1, aligned_seq2) if a == b and a != '-' and b != '-')
    mismatches = sum(1 for a, b in zip(aligned_seq1, aligned_seq2) if a != b and a != '-' and b != '-')
    identity = matches / (matches + mismatches) * 100 if (matches + mismatches) > 0 else 0
    
    # Create the visualization
    fig, ax = plt.subplots(1, 1, figsize=(14, 4))
    
    # Alignment Overview Bar
    alignment_length = len(aligned_seq1)
    chunk_size = max(1, alignment_length // 1000)  # Adjust for very long sequences
    
    # Create color-coded alignment bar
    colors = []
    for i in range(0, alignment_length, chunk_size):
        chunk1 = aligned_seq1[i:i+chunk_size]
        chunk2 = aligned_seq2[i:i+chunk_size]
        
        chunk_matches = sum(1 for a, b in zip(chunk1, chunk2) if a == b and a != '-')
        chunk_mismatches = sum(1 for a, b in zip(chunk1, chunk2) if a != b and a != '-' and b != '-')
        chunk_gaps = sum(1 for a, b in zip(chunk1, chunk2) if a == '-' or b == '-')
        
        if chunk_gaps > chunk_matches + chunk_mismatches:
            colors.append('#FF6B6B')  # Red for gaps
        elif chunk_matches > chunk_mismatches:
            colors.append('#4ECDC4')  # Teal for matches
        else:
            colors.append('#FFE66D')  # Yellow for mismatches
    
    # Plot alignment overview
    x_positions = np.arange(len(colors))
    ax.bar(x_positions, [1]*len(colors), color=colors, width=1, edgecolor='none')
    ax.set_xlim(0, len(colors))
    ax.set_ylim(0, 1)
    ax.set_title(f'{seq1_name} vs {seq2_name} - Alignment Overview', fontweight='bold', fontsize=14)
    ax.set_xlabel('Sequence Position')
    ax.set_yticks([])
    
    # Add legend
    match_patch = mpatches.Patch(color='#4ECDC4', label='Mostly Matches')
    mismatch_patch = mpatches.Patch(color='#FFE66D', label='Mostly Mismatches')
    gap_patch = mpatches.Patch(color='#FF6B6B', label='Gaps/Indels')
    ax.legend(handles=[match_patch, mismatch_patch, gap_patch], loc='upper right')
    
    # Add summary statistics as text
    stats_text = f'Score: {score:.0f} | Identity: {identity:.1f}% | Length: {alignment_length} bp'
    ax.text(0.02, 0.85, stats_text, transform=ax.transAxes, fontsize=11, 
            bbox=dict(boxstyle="round,pad=0.3", facecolor="white", alpha=0.8))
    
    plt.tight_layout()
    plt.show()
    
    return aligned_seq1, aligned_seq2, score

# Example usage:
# aligned_seq1, aligned_seq2, score = simple_alignment_overview(egfp_nt_seq, syfp2_nt_seq, "EGFP", "SYFP2")iz(egfp_nt_seq, syfp2_nt_seq, 
#                                                                   "EGFP", "SYFP2")
# differences = create_difference_plot(egfp_nt_seq, syfp2_nt_seq, "EGFP", "SYFP2")

In [7]:
aligned_seq1, aligned_seq2, score = create_beautiful_alignment_viz(egfp_nt_seq, syfp2_nt_seq, 
                                                                  "EGFP", "SYFP2")

NameError: name 'create_beautiful_alignment_viz' is not defined