# Biopython from the lecture notes

### Seq

In [1]:
from Bio.Seq import Seq

In [2]:
seq1 = Seq("ATCGAAGG")

In [3]:
seq1

Seq('ATCGAAGG')

##### Seq objects are like strings

In [4]:
len(seq1)

8

In [5]:
seq1[:4]

Seq('ATCG')

In [7]:
seq1.count("T")

1

##### But like strings, they are immutable

In [9]:
# .tomutable()
seq2 = seq1.tomutable()

In [11]:
seq1

Seq('ATCGAAGG')

In [12]:
seq2

MutableSeq('ATCGAAGG')

In [13]:
seq2[0] = "T"
seq2

MutableSeq('TTCGAAGG')

In [20]:
# .toseq()
seq3 = seq2.toseq()

In [21]:
seq3

Seq('TTCGAAGG')

##### Other methods of Seq objects

In [23]:
print(seq1.translate())

IE


In [25]:
print(seq1.reverse_complement())

CCTTCGAT


##### Other methods can be found in Bio.SeqUtils

### SeqRecord object

In [27]:
from Bio.SeqRecord import SeqRecord

In [28]:
seq1_rec = SeqRecord(seq1, id = "001")

In [29]:
seq1_rec

SeqRecord(seq=Seq('ATCGAAGG'), id='001', name='<unknown name>', description='<unknown description>', dbxrefs=[])

### SeqIO object

In [31]:
from Bio import SeqIO

In [36]:
# note that error will occur if there are more than one entry in a file

In [37]:
with open("testfile.fasta", "r") as fh:
    gene_list = list(SeqIO.parse(fh, "fasta"))

In [38]:
print(gene_list[0])

ID: seq1
Name: seq1
Description: seq1 [More description here]
Number of features: 0
Seq('ctccaaagaaattgtagttttcttctggcttagaggtagatcatcttggtccaa...ATC', SingleLetterAlphabet())


### Entrez

In [39]:
from Bio import Entrez

In [40]:
Entrez.email = "cheungngo@gmail.com"

In [51]:
handle = Entrez.efetch(db = "nucleotide",
                      id = "186972394",
                      rettype = "gb",
                      retmode = "text")

In [52]:
nu_186972394 = SeqIO.read(handle, "genbank")

In [53]:
nu_186972394

SeqRecord(seq=Seq('ATTTTTTACGAACCTGTGGAAATTTTTGGTTATGACAATAAATCTAGTTTAGTA...GAA', IUPACAmbiguousDNA()), id='EU490707.1', name='EU490707', description='Selenipedium aequinoctiale maturase K (matK) gene, partial cds; chloroplast', dbxrefs=[])