In [None]:
# Introduction to Biopython
# Biopython is a library designed for bioinformatics and computational biology tasks.
# It provides tools for working with biological data such as DNA, RNA, and protein sequences.
# Biopython can handle file formats like FASTA, GenBank, PDB, and others.
# It supports tasks like sequence analysis, structural bioinformatics, and database queries.
# Source: https://biopython.org/

In [None]:
# Advantages:
# 1. Comprehensive support for bioinformatics tasks, including sequence analysis and structural bioinformatics.
# 2. Handles a wide range of file formats used in bioinformatics, such as FASTA, GenBank, and PDB.
# 3. Actively maintained with extensive documentation and community support.

# Limitations:
# 1. Requires a basic understanding of bioinformatics concepts, making it less accessible to beginners.
# 2. Some features rely on external databases or tools, which may require additional setup (e.g., BLAST queries).
# 3. Not suitable for large-scale genomics projects requiring heavy computational resources.


In [7]:
!pip install biopython
from Bio import SeqIO
from Bio.Seq import Seq

# Example dataset: Simulated FASTA sequences
fasta_data = """>sequence1
ATGCGACTACGATCGATCGTAGCTAGTACGATCGATCGATCGA
>sequence2
ATGCTAGCTAGTACGATGATCGATCGTAGCTAGCTAGCATGCT
"""

with open("example.fasta", "w") as file:
    file.write(fasta_data)

# Parsing the FASTA file
print("Parsing sequences from FASTA file:")
for record in SeqIO.parse("example.fasta", "fasta"):
    seq = record.seq
    gc_content = 100 * float(seq.count("G") + seq.count("C")) / len(seq)  # GC calculation
    print(f"ID: {record.id}")
    print(f"Sequence: {seq}")
    print(f"GC Content: {gc_content:.2f}%")
    print()

# Sequence manipulation
print("Demonstrating sequence manipulation:")
seq = Seq("ATGCGACTACGATCGATCGTAGCTAGTACGATCG")
print(f"Original Sequence: {seq}")
print(f"Complement: {seq.complement()}")
print(f"Reverse Complement: {seq.reverse_complement()}")
print(f"Translated Protein: {seq.translate()}")


Parsing sequences from FASTA file:
ID: sequence1
Sequence: ATGCGACTACGATCGATCGTAGCTAGTACGATCGATCGATCGA
GC Content: 48.84%

ID: sequence2
Sequence: ATGCTAGCTAGTACGATGATCGATCGTAGCTAGCTAGCATGCT
GC Content: 46.51%

Demonstrating sequence manipulation:
Original Sequence: ATGCGACTACGATCGATCGTAGCTAGTACGATCG
Complement: TACGCTGATGCTAGCTAGCATCGATCATGCTAGC
Reverse Complement: CGATCGTACTAGCTACGATCGATCGTAGTCGCAT
Translated Protein: MRLRSIVASTI


