### Sequence Operations

In [1]:
from Bio.Seq import Seq
gene  = Seq("GTGGCTGATCGTATC")
print(gene)
print(gene.complement())
print(gene.reverse_complement())
print(gene.transcribe())
print(gene.translate())

GTGGCTGATCGTATC
CACCGACTAGCATAG
GATACGATCAGCCAC
GUGGCUGAUCGUAUC
VADRI


In [2]:
#gene or protein sequences can be stored in FASTA file format. It begins with a > and one line identifier. 
#FASTA  can contain more than 1 record.
from Bio import SeqIO
for sequence in SeqIO.parse("protein.fasta", "fasta"): 
    print(sequence.id)
    print(repr(sequence.seq))
    print(len(sequence))

NP_000199.2
Seq('MATGGRRGAAAAPLLVAVAALLLGAAGHLYPGEVCPGMDIRNNLTRLHELENCS...NPS')
1382
NC_007127.7:c4317955-4287539
Seq('AGACCTGCAGCGCTGAGCGGCAGAAGCGCTGATAGACTGAGCGAGCGCAAAAGT...GTT')
30417
sp|P63072|UBD_MOUSE
Seq('MASVRTCVVRSDQWRLMTFETTENDKVKKINEHIRSQTKVSVQDQILLLDSKIL...TGG')
162
sp|P49912|OPSD_RABIT
Seq('MNGTEGPDFYIPMSNQTGVVRSPFEYPQYYLAEPWQFSMLAAYMFLLIVLGFPI...APA')
348


In [3]:
#GC Content
from Bio.SeqUtils import gc_fraction
print(gc_fraction(gene))

0.5333333333333333


In [9]:
#Six frame translations
from Bio.SeqUtils import six_frame_translations
print(six_frame_translations("ATGTAGTGATGGATGGCCCGTGAGTGAGGT"))

GC_Frame: a:6 t:8 g:13 c:3
Sequence: atgtagtgat ... tgagtgaggt, 30 nt, 53.33 %GC


1/1
  V  V  M  D  G  P  *  V  R
 C  S  D  G  W  P  V  S  E
M  *  *  W  M  A  R  E  *  G
atgtagtgatggatggcccgtgagtgaggt   53 %
tacatcactacctaccgggcactcactcca
Y  H  H  I  A  R  S  H  P
 H  L  S  P  H  G  T  L  S  T
  T  T  I  S  P  G  H  T  L






In [5]:
#Aminoacid - 1 letter code to 3 letter code
from Bio.SeqUtils import seq3
sequence = "MVCYGHWWMCVH"
print(seq3(sequence))

MetValCysTyrGlyHisTrpTrpMetCysValHis


### Pairwise Sequence Alignment

In [6]:
#GLOBAL ALIGNMENT
#Align module contains many different functions and methods to perform both local and global alignments.
from Bio import Align

#Create an object from the Align module. Set the match score. For every match, the score will increment.
#We can put that object into a variable.

aligner = Align.PairwiseAligner(match_score = 2.0)

seq1 = "GAACT"
seq2 = "GAT"

#Get score using score method
score = aligner.score(seq1, seq2)
print(f" The score of the alignment is {score}")

#This is default by global aligment so it will give you all the optimal global possibilities.
alignments = aligner.align(seq1, seq2)
for alignment in alignments:
    print(alignment)

 The score of the alignment is 6.0
target            0 GAACT 5
                  0 ||--| 5
query             0 GA--T 3

target            0 GAACT 5
                  0 |-|-| 5
query             0 G-A-T 3



In [7]:
#LOCAL ALIGNMENT
aligner.mode = "local"

seq3 = "ATTAATG"
seq4 = "TAAT"

localscore = aligner.score(seq3,seq4)

print(f"The local alignment score is {localscore}")

alignment_local = aligner.align(seq3, seq4)

for alignment in alignment_local:
    print(alignment)

The local alignment score is 8.0
target            1 TTAAT 6
                  0 |-||| 5
query             0 T-AAT 4

target            2 TAAT 6
                  0 |||| 4
query             0 TAAT 4

