# Biopython demo
Biopython is set of tools for bioinformaticians.

More info here: http://biopython.org/wiki/Main_Page

You can download Biopython here: http://biopython.org/wiki/Download

Tutorial is available here: http://biopython.org/DIST/docs/tutorial/Tutorial.html or here: http://biopython.org/wiki/Getting_Started

In [None]:
# you can import Biopython by including following line in your code
import Bio

## Working with sequence data

In [1]:
from Bio.Seq import Seq
#create a sequence object
my_seq = Seq('CATGTAGACTAG')
print my_seq

CATGTAGACTAG


In [2]:
type(my_seq)

Bio.Seq.Seq

In [3]:
#print out some details about it
print 'seq %s is %i bases long' % (my_seq, len(my_seq))
print 'reverse complement is %s' % my_seq.reverse_complement()
print 'protein translation is %s' % my_seq.translate()

seq CATGTAGACTAG is 12 bases long
reverse complement is CTAGTCTACATG
protein translation is HVD*


## Working with files in FASTA format

![caption](orchid.jpg)

In [None]:
from Bio import SeqIO
# for each sequence print: 1. Sequence ID, 2. Sequence length
for seq_record in SeqIO.parse("ls_orchid.fasta", "fasta"):
    print seq_record.id
    print(len(seq_record))

In [None]:
handle = open("ls_orchid.fasta", "rU")
records = list(SeqIO.parse(handle, "fasta"))
handle.close()
print type(records)
print type(records[0])

In [None]:
print records[0].id  #first record
print records[-1].id #last record

## Working with files in GenBank format

In [None]:
from Bio import SeqIO
input_handle = open("ls_orchid.gbk", "rU")
for record in SeqIO.parse(input_handle, "genbank") :
    print record
input_handle.close()

In [None]:
# We can covnert from GenBank format to Fasta
count = SeqIO.convert("ls_orchid.gbk", "genbank", "ls_orchid_converted.fasta", "fasta")
print "Converted %i records" % count

In [None]:
# Load one record from GenBank
record = SeqIO.read("NC_005816.gb", "genbank")
print record

In [None]:
print record.seq
print record.id
print record.description

## Example: count GC%

In [None]:
# without biopython SeqIO
my_seq = 'GATCGATGGGCCTATATAGGATCGAAAATCGC'
100 * float(my_seq.count("G") + my_seq.count("C")) / len(my_seq)

In [None]:
# with SeqIO
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqUtils import GC
my_seq = Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPAC.unambiguous_dna)
GC(my_seq)

## Example: Transcription

In [None]:
# Transcription is done from template DNA
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna)
print coding_dna
template_dna = coding_dna.reverse_complement()
print template_dna

In [None]:
# Transcribe DNA from coding strand
messenger_rna_coding = coding_dna.transcribe()
print messenger_rna_coding
# Transcribe DNA as it is done in cell - from template strand
messenger_rna_template = template_dna.reverse_complement().transcribe()
print messenger_rna_template

In [None]:
import inspect
print "".join(inspect.getsourcelines(coding_dna.transcribe)[0])

## Example: Translation

In [None]:
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna)
# Translate RNA using different translation tables
print messenger_rna.translate()
print messenger_rna.translate(table="Vertebrate Mitochondrial")
print messenger_rna.translate(table="Bacterial")

In [None]:
# Translate directly from coding DNA
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna)
print coding_dna.translate()

In [None]:
#  Customize stop symbol in the output
print coding_dna.translate(stop_symbol="STOP")

## Translation tables

In [None]:
# Load tables from Data module 
from Bio.Data import CodonTable
standard_table = CodonTable.unambiguous_dna_by_name["Standard"]
mito_table = CodonTable.unambiguous_dna_by_name["Vertebrate Mitochondrial"]

In [None]:
# Print standard codon table
print(standard_table)

In [None]:
# Print codon table for mitochondia in vertebrates
print(mito_table)