In [6]:
from Bio import SeqIO
from Bio.Seq import Seq

### About the DNA observed:

- Gene: HBB (Hemoglobin, subunit beta)

- Organism: Homo sapiens (Human)

- Function: Encodes the β-globin protein, which combines with α-globin to form hemoglobin, the protein in red blood cells that carries oxygen.

- Clinical relevance: Mutations in this gene can cause sickle cell anemia and β-thalassemia.

In [3]:
record = SeqIO.read("data\\sequence.fasta","fasta")
dna_seq = str(record.seq)

In [4]:
print(f"Sequence length: {len(dna_seq)}")
print(f"First 50 bases: {dna_seq[:50]}")

Sequence length: 628
First 50 bases: ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACC


In [5]:
dna_seq

'ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACCATGGTGCATCTGACTCCTGAGGAGAAGTCTGCCGTTACTGCCCTGTGGGGCAAGGTGAACGTGGATGAAGTTGGTGGTGAGGCCCTGGGCAGGCTGCTGGTGGTCTACCCTTGGACCCAGAGGTTCTTTGAGTCCTTTGGGGATCTGTCCACTCCTGATGCTGTTATGGGCAACCCTAAGGTGAAGGCTCATGGCAAGAAAGTGCTCGGTGCCTTTAGTGATGGCCTGGCTCACCTGGACAACCTCAAGGGCACCTTTGCCACACTGAGTGAGCTGCACTGTGACAAGCTGCACGTGGATCCTGAGAACTTCAGGCTCCTGGGCAACGTGCTGGTCTGTGTGCTGGCCCATCACTTTGGCAAAGAATTCACCCCACCAGTGCAGGCTGCCTATCAGAAAGTGGTGGCTGGTGTGGCTAATGCCCTGGCCCACAAGTATCACTAAGCTCGCTTTCTTGCTGTCCAATTTCTATTAAAGGTTCCTTTGTTCCCTAAGTCCAACTACTAAACTGGGGGATATTATGAAGGGCCTTGAGCATCTGGATTCTGCCTAATAAAAAACATTTATTTTCATTGCAA'

#### 1. Basic Statistics

In [7]:
sequence = Seq(dna_seq)

In [9]:
# Basic statistics

length = len(sequence)
gc_content = 100 * (sequence.count("G") + sequence.count("C")) / length

print(f"Sequence length: {length} bp")
print(f"A: {sequence.count('A')}")
print(f"T: {sequence.count('T')}")
print(f"G: {sequence.count('G')}")
print(f"C: {sequence.count('C')}")
print(f"GC Content: {gc_content:.2f}%")

Sequence length: 628 bp
A: 139
T: 167
G: 165
C: 157
GC Content: 51.27%


#### 2. Reverse Complement

In [11]:
rev_comp = sequence.reverse_complement()

print("Original:       ", sequence[:50])
print("Reverse Complement:", rev_comp[:50])

Original:        ACATTTGCTTCTGACACAACTGTGTTCACTAGCAACCTCAAACAGACACC
Reverse Complement: TTGCAATGAAAATAAATGTTTTTTATTAGGCAGAATCCAGATGCTCAAGG
