In [1]:
from Bio.SeqIO import parse
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq

In [2]:
file = open("example.fasta")
records = parse(file, "fasta")
for record in records:
    print("------------------------------------------")
    print("ID:",record.id)
    print("NAME:",record.name)
    print("DESCRIPTION:", record.description)
    print("ANNOTATIONS:", record.annotations)
    print("SEQUENCE DATA:", record.seq)
    print("SEQUENCE ALPHABET:", record.seq.alphabet)
    print("------------------------------------------")
    print()

------------------------------------------
ID: sp|P25730|FMS1_ECOLI
NAME: sp|P25730|FMS1_ECOLI
DESCRIPTION: sp|P25730|FMS1_ECOLI CS1 fimbrial subunit A precursor (CS1 pilin) MKLKKTIGAMALATLFATMGASAVEKTISVTASVDPTVDLLQSDGSALPNSVALTYSPAV
ANNOTATIONS: {}
SEQUENCE DATA: NNFEAHTINTVVHTNDSDKGVVVKLSADPVLSNVLNPTLQIPVSVNFAGKPLSTTGITIDSNDLNFASSGVNKVSSTQKLSIHADATRVTGGALTAGQYQGLVSIILTKSTITTITTKGT
SEQUENCE ALPHABET: SingleLetterAlphabet()
------------------------------------------

------------------------------------------
ID: sp|P15488|FMS3_ECOLI
NAME: sp|P15488|FMS3_ECOLI
DESCRIPTION: sp|P15488|FMS3_ECOLI CS3 fimbrial subunit A precursor (CS3 pilin) MLKIKYLLIGLSLSAMSSYSLAAAGPTLTKELALNVLSPAALDATWAPQDNLTLSNTGVS
ANNOTATIONS: {}
SEQUENCE DATA: NTLVGVLTLSNTSIDTVSIASTNVSDTSKNGTVTFAHETNNSASFATTISTDNANITLDKNAGNTIVKTTNGSQLPTNLPLKFITTEGNEHLVSGNYRANITITSTIKGGGTKKGTTDKK
SEQUENCE ALPHABET: SingleLetterAlphabet()
------------------------------------------



# Sequence

In [4]:
from Bio.Seq import Seq
seq = Seq("agct")
print(seq)
seq

agct


Seq('agct')

# Alphabet Module

In [6]:
from Bio.Seq import Seq
from Bio.Alphabet import single_letter_alphabet

test_seq = Seq('AGTACACTGGT', single_letter_alphabet)
test_seq

Seq('AGTACACTGGT', SingleLetterAlphabet())

In [7]:
from Bio.Seq import Seq
from Bio.Alphabet import generic_protein

test_seq = Seq('AGTACACTGGT', generic_protein)
test_seq

Seq('AGTACACTGGT', ProteinAlphabet())

In [8]:
from Bio.Data import IUPACData
IUPACData.protein_letters

'ACDEFGHIKLMNPQRSTVWY'

# Basic Operations

In [9]:
seq_string = Seq('AGCTAGCT')
seq_string[0]

'A'

In [10]:
len(seq_string)

8

In [11]:
from Bio.Alphabet import generic_dna, generic_protein

seq1 = Seq('AGCT', generic_dna)
seq2 = Seq('TCGA', generic_dna)

seq1 + seq2

Seq('AGCTTCGA', DNAAlphabet())

In [12]:
from Bio.Alphabet import generic_dna
list = [Seq('AGCT',generic_dna),Seq('TCGA',generic_dna),Seq('AAA',generic_dna)]
for s in list:
    print(s)
final_seq = Seq(' ', generic_dna)
for s in list:
    final_seq += s
final_seq

AGCT
TCGA
AAA


Seq(' AGCTTCGAAAA', DNAAlphabet())

In [13]:
from Bio.Alphabet import generic_rna
rna = Seq('agct', generic_rna)
rna.upper()

Seq('AGCT', RNAAlphabet())

In [17]:
rna = Seq('agct', generic_dna)
print("'a' in rna:",'a' in rna)
print("'A' in rna:",'A' in rna)
rna1 = Seq('AGCT',generic_dna)
print("rna is rna1:",rna is rna1)

'a' in rna: True
'A' in rna: False
rna is rna1: False


In [19]:
protein_seq = Seq('AGUACACUGGU', generic_protein)
print(protein_seq.find('G'))
print(protein_seq.find('GG'))

1
8


In [20]:
protein_seq = Seq('AGUACACUGGU',generic_protein)
protein_seq.split('A')

[Seq('', ProteinAlphabet()),
 Seq('GU', ProteinAlphabet()),
 Seq('C', ProteinAlphabet()),
 Seq('CUGGU', ProteinAlphabet())]

In [24]:
strip_seq = Seq("    AGCT    ")
print("'",strip_seq,"'")
print(strip_seq.strip())

'     AGCT     '
AGCT
