In [None]:
!pip install biopython

Collecting biopython
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: biopython
Successfully installed biopython-1.83


In [None]:
from Bio.Seq import Seq
sequence = Seq("ATTGCTAGCT")

In [None]:
print(sequence)

ATTGCTAGCT


In [None]:
print("Original Sequence:      ",sequence, "\n", "Complementary sequence:",sequence.complement() )
print("Reverse Complementary : ",sequence.reverse_complement())

Original Sequence:       ATTGCTAGCT 
 Complementary sequence: TAACGATCGA
Reverse Complementary :  AGCTAGCAAT


# 4. Sequence Method

In [None]:
for index, letter in enumerate(sequence) :
  print("{} {}". format(index, letter))

0 A
1 T
2 T
3 G
4 C
5 T
6 A
7 G
8 C
9 T


In [None]:
print("Length of the sequence :",len(sequence))
print("Index of the first A :",sequence.find("A"))
print("at Index 5 the nucleotide present is :",sequence[5])
print("Count of the letter G in the sequence :",sequence.count("G")) # Non-Overlapping
print("Count method does not considered Overlapping" )

Length of the sequence : 10
Index of the first A : 0
at Index 5 the nucleotide present is : T
Count of the letter G in the sequence : 2
Count method does not considered Overlapping


## % of Nucleotide in the Sequence

In [None]:
sequence = "AGCTGCATGACTCCGCTGATGCGTAACGTCACCTCTCAGT"

# What is the percentage of Guanine and Cytosine in the seqeunce ?
print("GC Percentage :",100*(sequence.count("G") + sequence.count("C"))/len(sequence))

# What is the percentage of Adenine and thymine in the seqeunce ?
print("AT Percentage :",100*(sequence.count("A") + sequence.count("T"))/len(sequence))

# what is percentage of Adenine
print("Adenine Percentage :",100*sequence.count("A")/len(sequence))
# what is percentage of Thimine
print("Thymine Percentage :",100*sequence.count("T") /len(sequence))
# what is percentage of Guanine
print("Guanine Percentage :",100*sequence.count("G") /len(sequence))
# what is percentage of Cytosine
print("Cytosine Percentage :",100*sequence.count("C") /len(sequence))


GC Percentage : 55.0
AT Percentage : 45.0
Adenine Percentage : 20.0
Thymine Percentage : 25.0
Guanine Percentage : 22.5
Cytosine Percentage : 32.5


## Build in Functions to find GC %

In [None]:
from Bio.SeqUtils import gc_fraction
print("GC Percentage",100*gc_fraction(sequence))
print("AT Percentage",100*(1-gc_fraction(sequence)))   # 1-gc_fraction(sequence)

GC Percentage 55.00000000000001
AT Percentage 44.99999999999999


# 5. Slicing Sequence and FASTA format

In [None]:
slice_1 = sequence[5:10]
ending_slice = sequence[15:]
starting_slice = sequence[:10]
print(sequence)
print("slice :",slice_1)
print("Ending slice :",ending_slice)
print("Starting slice :",starting_slice)

AGCTGCATGACTCCGCTGATGCGTAACGTCACCTCTCAGT
slice : CATGA
Ending slice : CTGATGCGTAACGTCACCTCTCAGT
Starting slice : AGCTGCATGA


## FASTA

In [None]:
fasta = f">name\n{sequence}\n"
print(fasta)

>name
AGCTGCATGACTCCGCTGATGCGTAACGTCACCTCTCAGT



# 6. Concatenation and Case Transformation


In [None]:
list_sequence = [Seq("ATG"), Seq("TAG"), Seq("TAA")]
result = Seq("")

for s in list_sequence :
  result += s
print(result)

ATGTAGTAA


In [None]:
# Delimiter
contig = [Seq("ATG"), Seq("TAG"), Seq("TAA")]
delimiter = Seq("N"*5)

delimiter_join = delimiter.join(contig)
print(delimiter_join)

ATGNNNNNTAGNNNNNTAA


## Case Transformation

In [None]:
dna_seq = Seq("ATGCgtca")
print("original Sequence : ",dna_seq)
print(dna_seq.upper())
print(dna_seq.lower())


original Sequence :  ATGCgtca
ATGCGTCA
atgcgtca


In [None]:
print("atg" in dna_seq)
print("atg" in dna_seq. lower())

False
True


# 7. Transcription

In [None]:
coding_strand = Seq("ATGCATGCATCGATCGACGCA")
print("coding strand:",coding_strand)
template_strand = coding_strand.reverse_complement()
print("template strand:", template_strand)

# Transcription
mRNA_sequence = coding_strand.transcribe()
print("mRNA sequence:", mRNA_sequence)

# Reverse Transcription
mRNA = mRNA_sequence.back_transcribe()
print("Reverse Transcription:", mRNA)

coding strand: ATGCATGCATCGATCGACGCA
template strand: TGCGTCGATCGATGCATGCAT
mRNA sequence: AUGCAUGCAUCGAUCGACGCA
Reverse Transcription: ATGCATGCATCGATCGACGCA


# 8. Translation

The translation tables available in Biopython are based on those from the NCBI: https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi


By default, translation will use the standard genetic code. Suppose we are dealing with a mitochondrial sequence. We need to tell the translation function to use the relevant genetic code instead:

In [None]:
mRNA_sequence
print("Protein Sequence : ", mRNA_sequence.translate())   # "*" represents stop codon

Protein Sequence :  MHASIDA


In [None]:
gene = Seq(
     "GTGAAAAAGATGCAATCTATCGTACTCGCACTTTCCCTGGTTCTGGTCGCTCCCATGGCA"
     "GCACAGGCTGCGGAAATTACGTTAGTCCCGTCAGTAAAATTACAGATAGGCGATCGTGAT"
     "AATCGTGGCTATTACTGGGATGGAGGTCACTGGCGCGACCACGGCTGGTGGAAACAACAT"
     "TATGAATGGCGAGGCAATCGCTGGCACCTACACGGACCGCCGCCACCGCCGCGCCACCAT"
     "AAGAAAGCTCCTCATGATCATCACGGCGGTCATGGTCCAGGCAAACATCACCGCTAA")

In [None]:
 print(gene.translate(table='Bacterial'))

VKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHLHGPPPPPRHHKKAPHDHHGGHGPGKHHR*


In [None]:
 print(gene.translate(table='Bacterial',cds = True))

MKKMQSIVLALSLVLVAPMAAQAAEITLVPSVKLQIGDRDNRGYYWDGGHWRDHGWWKQHYEWRGNRWHLHGPPPPPRHHKKAPHDHHGGHGPGKHHR


# 9. Mutable Sequence

In [None]:
from Bio.Seq import MutableSeq
seq = Seq("ACGATAGTCATGGGCCGCTGAAAGGGTGCCCGA")
seq

Seq('ACGATAGTCATGGGCCGCTGAAAGGGTGCCCGA')

In [None]:
mutable_seq = MutableSeq(seq)
mutable_seq

MutableSeq('ACGATAGTCATGGGCCGCTGAAAGGGTGCCCGA')

In [None]:
mutable_seq[1] = "T"
mutable_seq

MutableSeq('ATGATAGTCATGGGCCGCTGAAAGGGTGCCCGA')

In [None]:
mutable_seq.reverse()

In [None]:
mutable_seq

MutableSeq('AGCCCGTGGGAAAGTCGCCGGGTACTGATAGTA')