### Sequence annotation objects

In [5]:
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

help(SeqRecord)

Including a sequence identifier is very important in order to output your SeqRecord to a file

In [7]:
simple_seq = Seq("GATC")

In [8]:
simple_seq_r = SeqRecord(simple_seq, id="AC12349")

In [9]:
simple_seq_r

SeqRecord(seq=Seq('GATC', Alphabet()), id='AC12349', name='<unknown name>', description='<unknown description>', dbxrefs=[])

> SeqRecord has a dictionary attribute annotation

> Used for miscellaneous annotations that doesn't fit under one of the oother more specific attributes 


In [10]:
simple_seq_r.annotations['evidence'] = "None. Just adding an annotation for craic"

In [11]:
simple_seq_r.annotations

{'evidence': 'None. Just adding an annotation for craic'}

In [12]:
simple_seq_r.letter_annotations["phred_quality"] = [40,40,38,30]

In [13]:
simple_seq_r.letter_annotations

{'phred_quality': [40, 40, 38, 30]}

### SeqRecord objects from FASTA files

In [14]:
from Bio import SeqIO

In [15]:
record = SeqIO.read("NC_005816.fna.rtf","fasta")

In [16]:
record

SeqRecord(seq=Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...TG}', SingleLetterAlphabet()), id='gi|45478711|ref|NC_005816.1|', name='gi|45478711|ref|NC_005816.1|', description='gi|45478711|ref|NC_005816.1| Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence\\', dbxrefs=[])

#### Exploring key attributes of SeqRecord individually

In [17]:
record.seq

Seq('TGTAACGAACGGTGCAATAGTGATCCACACCCAACGCCTGAAATCAGATCCAGG...TG}', SingleLetterAlphabet())

In [18]:
record.id

'gi|45478711|ref|NC_005816.1|'

In [19]:
record.description

'gi|45478711|ref|NC_005816.1| Yersinia pestis biovar Microtus str. 91001 plasmid pPCP1, complete sequence\\'

In [20]:
record.dbxrefs

[]

In [21]:
record.annotations

{}

In [22]:
record.features

[]

### SeqRecord objects from GenBank files

In [34]:
from Bio import SeqIO

with open("NC_005816.gb.rtf", "rU") as input_handle:
    for record in SeqIO.parse(input_handle, "genbank"):
        print(record)

record = SeqIO.read("NC_005816.gb.rtf", "genbank")

### SeqFeature

In [46]:
from Bio import SeqFeature
start_pos = SeqFeature.AfterPosition(5)
end_pos = SeqFeature.BetweenPosition(9, left=8, right=9)
my_location = SeqFeature.FeatureLocation(start_pos, end_pos)

The key idea about each SeqFeature object is to describe a region on a parent sequence, for which we use a location object, typically describing a range between two positions.

In [48]:
print(my_location)

[>5:(8^9)]


#### Location testing

In [49]:
from Bio import SeqIO

In [50]:
my_snp = 4350

In [51]:
record = SeqIO.read("NC_005816.fna.rtf","fasta")

FileNotFoundError: [Errno 2] No such file or directory: 'NC_005816.fna.rtf'

In [52]:
#Page 44 of biopython
#Find out how to read genbank or fasta files from online