# Execute protein sequences from a FASTA file

In [16]:
from pprint import pprint
import requests, io
%run ../../chemw/mw.py

# examining a FASTA file from the Codons PyPI module
protein = Proteins()
protein.mass(fasta_link = 'https://github.com/freiburgermsu/codons/raw/main/examples/translation-transcription/translation-40_proteins/protein_sequence.fasta')
print('\n\nresultant calculations', '\n', '='*30)
pprint(protein.fasta_protein_masses)

>Protein - 8residues - 1124.3123amu - 
VHGWRKCL
>Protein - 15residues - 2154.24672amu - 
QNFNAHPTQQVEWRF
>Protein - 6residues - 827.91874amu - 
WGIIYS
>Protein - 8residues - 1104.23148amu - 
GRYYVFCA
>Protein - 1residues - 165.18914amu - 
F
>Protein - 3residues - 393.51876amu - 
LLL
>Protein - 3residues - 352.38402amu - 
LQG
>Protein - 25residues - 3326.59382amu - 
SCVFGFTHDTAVNNRSLSDCRWCKF
>Protein - 3residues - 470.56302amu - 
RFL
>Protein - 35residues - 4661.39614amu - 
LFCTHGLERVVQCLWHKRCCSTRLKSLLLRGCANC
>Protein - 4residues - 640.72716amu - 
PVWW
>Protein - 3residues - 386.4433amu - 
GFK
>Protein - 21residues - 2653.0669amu - 
HNYGIVVFCVSLVCLQSGLIK
>Protein - 4residues - 482.61194amu - 
LALL
>Protein - 8residues - 1007.19758amu - 
CLLFSSLV
>Protein - 1residues - 115.13046amu - 
P
>Protein - 1residues - 117.14634amu - 
V
>Protein - 2residues - 222.23892amu - 
VS
>Protein - 38residues - 5036.4143amu - 
PIDSCSNCWSCWCSHSRRTYYIGLGEQSDNFLCFSSLC
>Protein - 11residues - 1490.62864amu - 
D

# Execute protein sequences from a string

In [19]:
# from chemw import Proteins
%run ../../chemw/mw.py

sequence = 'LFCTHGLERVVQCLWHKRCCSTRLKSLLLRGCANC*'
protein = Proteins()
protein_mw = protein.mass(sequence)

>Protein - 36residues - 4661.39614amu - 
LFCTHGLERVVQCLWHKRCCSTRLKSLLLRGCANC*


# Calculate the mass of individual amino acids

In [20]:
from pubchempy import get_compounds
from pprint import pprint
import requests
import json, io
%run ../../chemw/mw.py

# import a JSON of the amino acids
codons_amino_acids = requests.get('https://raw.githubusercontent.com/freiburgermsu/codons/main/codons/rosetta_stone/amino_acid_synonyms.json').content
amino_acids_dic = json.load(io.StringIO(codons_amino_acids.decode('utf-8')))
pprint(amino_acids_dic)
print('\n\n')

# parse the amino acids and construct a new dictionary of abbreviated amino acids
amino_acids_masses = {}
chem_mw = ChemMW()
for aa in amino_acids_dic:
    print(aa)
    formula = get_compounds(aa, 'name')[0].molecular_formula
    amino_acids_masses[amino_acids_dic[aa]['three_letter']] = amino_acids_masses[amino_acids_dic[aa]['one_letter']] = chem_mw.mass(formula)
    print('\n')
    
# export the amino acids dictionary
with open('amino_acids_masses.json', 'w') as out:
    json.dump(amino_acids_masses,out, indent = 4)

{'alanine': {'one_letter': 'A', 'three_letter': 'ala'},
 'arginine': {'one_letter': 'R', 'three_letter': 'arg'},
 'asparagine': {'one_letter': 'N', 'three_letter': 'asn'},
 'aspartic acid': {'one_letter': 'D', 'three_letter': 'asp'},
 'cysteine': {'one_letter': 'C', 'three_letter': 'cys'},
 'glutamic acid': {'one_letter': 'E', 'three_letter': 'glu'},
 'glutamine': {'one_letter': 'Q', 'three_letter': 'gln'},
 'glycine': {'one_letter': 'G', 'three_letter': 'gly'},
 'histidine': {'one_letter': 'H', 'three_letter': 'his'},
 'isoleucine': {'one_letter': 'I', 'three_letter': 'ile'},
 'leucine': {'one_letter': 'L', 'three_letter': 'leu'},
 'lysine': {'one_letter': 'K', 'three_letter': 'lys'},
 'methionine': {'one_letter': 'M', 'three_letter': 'met'},
 'phenylalanine': {'one_letter': 'F', 'three_letter': 'phe'},
 'proline': {'one_letter': 'P', 'three_letter': 'pro'},
 'serine': {'one_letter': 'S', 'three_letter': 'ser'},
 'threonine': {'one_letter': 'T', 'three_letter': 'thr'},
 'tryptophan': 