In [22]:
##  Amino Acid Sequences ##
## The AASequence class handles amino acid sequences in OpenMS ##

from pyopenms import AASequence
from pyopenms import ResidueDB

In [23]:
from pyopenms import *
seq = AASequence.fromString("EFDIANGAP")     
prefix = seq.getPrefix(3)                  
suffix = seq.getSuffix(4)               
con = seq + seq                          

print("Sequence:    ", seq)
print("Prefix:      ", prefix)
print("Suffix:      ", suffix)
print("Concatenated:", con)

full = seq.getMonoWeight()
precursor = seq.getMonoWeight(Residue.ResidueType.Full, 2) 

mz = seq.getMonoWeight(Residue.ResidueType.Full, 2) / 2.0 
mz = seq.getMZ(2)

print('===============================================')
print("Monoisotopic mass of peptide [M] is", full)
print("Monoisotopic mass of peptide precursor [M+2H]2+ is", precursor)
print("Monoisotopic m/z of [M+2H]2+ is", mz)

Sequence:     EFDIANGAP
Prefix:       EFD
Suffix:       NGAP
Concatenated: EFDIANGAPEFDIANGAP
Monoisotopic mass of peptide [M] is 932.4239669140001
Monoisotopic mass of peptide precursor [M+2H]2+ is 934.438519847542
Monoisotopic m/z of [M+2H]2+ is 467.219259923771


In [16]:
seq = AASequence.fromString("EFDIANGAP")

print("The peptide", str(seq), "consists of the following amino acids:")
for aa in seq:
    print(aa.getName(), ":", aa.getMonoWeight())

The peptide EFDIANGAP consists of the following amino acids:
Glutamate : 147.0531592871
Phenylalanine : 165.07897935090003
Aspartate : 133.0375092233
Isoleucine : 131.0946294147
Alanine : 89.0476792233
Asparagine : 132.0534932552
Glycine : 75.0320291595
Alanine : 89.0476792233
Proline : 115.06332928709999


In [24]:
## Molecular Formula ##

seq = AASequence.fromString("EFDIANGAP")
seq_formula = seq.getFormula()
print("Peptide", seq, "has molecular formula", seq_formula)

Peptide EFDIANGAP has molecular formula C41H60N10O15


In [25]:
##   Fragment Ions ##

suffix = seq.getSuffix(4)                # y4 ion "EFDI"                         
print("y4 ion sequence:", suffix)


y4_formula = suffix.getFormula(Residue.ResidueType.YIon, 2)     #y4++ ion   


suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0            

suffix.getMonoWeight(Residue.ResidueType.XIon, 2) / 2.0            



print("y4 mz:", suffix.getMonoWeight(Residue.ResidueType.YIon, 2) / 2.0 )

print("y4 molecular formula:", y4_formula)

y4 ion sequence: NGAP
y4 mz: 179.589694333621
y4 molecular formula: C14H23N5O6


In [26]:
##    Modified Sequences ##
seq = AASequence.fromString("AEKTEDESEKPIM(Oxidation)CER")
print(seq.toUnmodifiedString())
print(seq.toString())
print(seq.toUniModString())
print(seq.toBracketString())
print(seq.toBracketString(False))
  
print("*"*40)

print(AASequence.fromString("DFPIAM(UniMod:35)GER"))

print(AASequence.fromString("DFPIAM[+16]GER"))

print(AASequence.fromString("DFPIAM[+15.99]GER"))

print(AASequence.fromString("DFPIAM[147]GER"))

print(AASequence.fromString("DFPIAM[147.035405]GER"))

AEKTEDESEKPIMCER
AEKTEDESEKPIM(Oxidation)CER
AEKTEDESEKPIM(UniMod:35)CER
AEKTEDESEKPIM[147]CER
AEKTEDESEKPIM[147.03540001709996]CER
****************************************
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER
DFPIAM(Oxidation)GER


In [20]:
##   Proteins & FASTA Files ##
bsa = FASTAEntry() 
bsa.sequence ="VSLGRTKLLMFERKKYWSDFHKPMGElPLMEGRWLSF"                       
bsa.description = "BSA Bovine Albumin (partial sequence)"
bsa.identifier = "BSA"

alb = FASTAEntry()
alb.sequence = "VSLGRTKLLMFERKKYWSDFHKPMGElPLMEGRWLSF"
alb.description = "ALB Human Albumin (partial sequence)"
alb.identifier = "ALB"


entries = [bsa, alb]

f = FASTAFile()

f.store("example.fasta", entries)

In [21]:
entries = []

f = FASTAFile()

f.load("example.fasta", entries)

print( len(entries) )

for e in entries:
    print (e.identifier, e.sequence)

2
BSA VSLGRTKLLMFERKKYWSDFHKPMGElPLMEGRWLSF
ALB VSLGRTKLLMFERKKYWSDFHKPMGElPLMEGRWLSF
