In [None]:
!pip install Bio

Collecting Bio
  Downloading bio-1.8.1-py3-none-any.whl.metadata (5.7 kB)
Collecting biopython>=1.80 (from Bio)
  Downloading biopython-1.85-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting gprofiler-official (from Bio)
  Downloading gprofiler_official-1.0.0-py3-none-any.whl.metadata (11 kB)
Collecting mygene (from Bio)
  Downloading mygene-3.2.2-py2.py3-none-any.whl.metadata (10 kB)
Collecting biothings-client>=0.2.6 (from mygene->Bio)
  Downloading biothings_client-0.4.1-py3-none-any.whl.metadata (10 kB)
Downloading bio-1.8.1-py3-none-any.whl (321 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m321.3/321.3 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading biopython-1.85-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gprofiler_official-1.0.0-py3-none-any.whl (9.3

In [None]:
!pip install biopython


Collecting biopython
  Downloading biopython-1.85-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.3 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.6/3.3 MB[0m [31m19.3 MB/s[0m eta [36m0:00:01[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.3/3.3 MB[0m [31m57.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m42.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


In [None]:
# --- IMPORTS ---
import os
try:
    from Bio.Blast import NCBIWWW
    from Bio.Blast import NCBIXML
    BIOPYTHON_AVAILABLE = True
except ImportError:
    print("Warning: Biopython not found. BLAST functionality will be skipped.")
    print("To enable BLAST, install it: !pip install biopython")
    BIOPYTHON_AVAILABLE = False


# ===============================================
# --- PART 1: FASTA Reading and Local Comparison ---
# ===============================================

def read_fasta(file_path):
    """Reads a FASTA file and returns a dictionary of sequences."""
    sequences = {}
    current_sequence_name = None

    if not os.path.exists(file_path):
        print(f"Error: FASTA file not found at '{file_path}'")
        return sequences

    with open(file_path, 'r') as f:
        for line in f:
            line = line.strip()
            if line.startswith('>'):
                current_sequence_name = line[1:].split()[0]  # Take only the first word as name
                sequences[current_sequence_name] = ''
            elif current_sequence_name:
                sequences[current_sequence_name] += line
    return sequences


def simple_sequence_comparison(seq1, seq2, match_score=1, mismatch_penalty=-1, gap_penalty=-2):
    """Performs a simple character-by-character comparison of two sequences."""
    score = 0
    alignment_seq1 = ""
    alignment_seq2 = ""
    i, j = 0, 0

    while i < len(seq1) and j < len(seq2):
        if seq1[i].upper() == seq2[j].upper():  # Case-insensitive comparison
            score += match_score
            alignment_seq1 += seq1[i]
            alignment_seq2 += seq2[j]
        else:
            score += mismatch_penalty
            alignment_seq1 += seq1[i]
            alignment_seq2 += seq2[j]
        i += 1
        j += 1

    # Add remaining characters with gap penalty
    while i < len(seq1):
        score += gap_penalty
        alignment_seq1 += seq1[i]
        alignment_seq2 += '-'
        i += 1

    while j < len(seq2):
        score += gap_penalty
        alignment_seq1 += '-'
        alignment_seq2 += seq2[j]
        j += 1

    return score, alignment_seq1, alignment_seq2


# --- Example Usage for Part 1 ---
def run_local_comparison():
    print("\n" + "=" * 50)
    print("Local FASTA Reading and Simple Comparison")
    print("=" * 50)

    # 1. Create dummy FASTA files for demonstration
    file1_name = 'sequence1.fasta'
    file2_name = 'sequence2.fasta'

    print(f"Creating dummy files: {file1_name} and {file2_name}")
    with open(file1_name, 'w') as f:
        f.write('>SeqA_Example\n')
        f.write('ATGCGTACGTAC')

    with open(file2_name, 'w') as f:
        f.write('>SeqB_Example\n')
        f.write('ATGCGTAGCTACAG')  # Added 'AG' to test gap

    # 2. Read sequences from FASTA files
    sequences1 = read_fasta(file1_name)
    sequences2 = read_fasta(file2_name)

    if not sequences1 or not sequences2:
        print("Could not proceed with local comparison due to missing sequences.")
        return

    # 3. Assuming each file has at least one sequence
    seq1_name = list(sequences1.keys())[0]
    seq2_name = list(sequences2.keys())[0]
    sequence1 = sequences1[seq1_name]
    sequence2 = sequences2[seq2_name]

    # 4. Perform simple comparison
    score, aligned_seq1, aligned_seq2 = simple_sequence_comparison(sequence1, sequence2)

    print(f"\nSequence 1 ({seq1_name}): {sequence1}")
    print(f"Sequence 2 ({seq2_name}): {sequence2}")
    print("\nSimple Comparison Results:")
    print(f"Alignment Score: {score}")
    print(f"Aligned Sequence 1: {aligned_seq1}")
    print(f"Aligned Sequence 2: {aligned_seq2}")


# ===============================================
# --- PART 2: Online BLAST Search (Biopython) ---
# ===============================================

def run_online_blast():
    if not BIOPYTHON_AVAILABLE:
        print("\n" + "=" * 50)
        print("Skipping Online BLAST Search (Biopython not available)")
        print("=" * 50)
        return

    print("\n" + "=" * 50)
    print("Online NCBI BLAST Search")
    print("=" * 50)

    # Define the two sequence IDs you want to BLAST
    sequence_ids = ["AB021961.1", "OR523692.1"]  # Replace with your desired sequence IDs

    # Perform BLAST for each sequence
    for seq_id in sequence_ids:
        print(f"\nPerforming BLAST for sequence ID: {seq_id} (This may take a minute or two...)")

        try:
            # Perform the BLAST search
            result_handle = NCBIWWW.qblast(
                "blastn",  # BLAST program (nucleotide blast)
                "nt",      # Database (nucleotide database)
                seq_id     # Query sequence ID
            )

            # Parse the BLAST results
            blast_records = NCBIXML.parse(result_handle)
            found_hits = False

            # Print some information about the hits
            for blast_record in blast_records:
                print(f"Query: {blast_record.query}")
                for alignment in blast_record.alignments:
                    for hsp in alignment.hsps:
                        found_hits = True
                        print(f"  Alignment: {alignment.title}")
                        print(f"  Length: {alignment.length}")
                        print(f"  E-value: {hsp.expect}")
                        print(f"  Score: {hsp.score}")
                        print(f"  Identities: {hsp.identities}/{hsp.align_length}")
                        print(f"  Gaps: {hsp.gaps}")
                        print(f"  Query start: {hsp.query_start}, Query end: {hsp.query_end}")
                        print(f"  Subject start: {hsp.sbjct_start}, Subject end: {hsp.sbjct_end}")
                        print(f"  Query: {hsp.query[0:70]}{'...' if len(hsp.query) > 70 else ''}")
                        print(f"  Match: {hsp.match[0:70]}{'...' if len(hsp.match) > 70 else ''}")
                        print(f"  Sbjct: {hsp.sbjct[0:70]}{'...' if len(hsp.sbjct) > 70 else ''}")
                        print("-" * 80)

                if not found_hits:
                    print(f"No significant hits found for {seq_id}.")

            result_handle.close()

        except Exception as e:
            print(f"An error occurred during BLAST for {seq_id}: {e}")
            print("Please ensure your internet connection is stable and the sequence ID is valid.")


# ===============================================
# --- MAIN EXECUTION BLOCK ---
# ===============================================

if __name__ == "__main__":
    # 1. Run local FASTA comparison
    run_local_comparison()

    # 2. Run online BLAST search
    run_online_blast()



Local FASTA Reading and Simple Comparison
Creating dummy files: sequence1.fasta and sequence2.fasta

Sequence 1 (SeqA_Example): ATGCGTACGTAC
Sequence 2 (SeqB_Example): ATGCGTAGCTACAG

Simple Comparison Results:
Alignment Score: 4
Aligned Sequence 1: ATGCGTACGTAC--
Aligned Sequence 2: ATGCGTAGCTACAG

Online NCBI BLAST Search

Performing BLAST for sequence ID: AB021961.1 (This may take a minute or two...)
Query: Mus musculus mutant p53 mRNA, complete cds
  Alignment: gi|5421849|dbj|AB021961.1| Mus musculus mutant p53 mRNA, complete cds
  Length: 1429
  E-value: 0.0
  Score: 2810.0
  Identities: 1429/1429
  Gaps: 0
  Query start: 1, Query end: 1429
  Subject start: 1, Subject end: 1429
  Query: TTCCTGGNCTGTAGGTAGCGACTACAGTTAGGGGGCACCTAGCATTCAGGCCCTCATCCTCCTCCTTCCC...
  Match: ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||...
  Sbjct: TTCCTGGNCTGTAGGTAGCGACTACAGTTAGGGGGCACCTAGCATTCAGGCCCTCATCCTCCTCCTTCCC...
----------------------------------------------------------