In [13]:
# 200804
# Given a compound MW calculates the ion forms for related products...

In [14]:
from itertools import combinations
from itertools import groupby

In [15]:
from dataclasses import dataclass

@dataclass
class Composition:
    Name: str = ""
    Count: int = 1
    Mass: float=-1
    
    mods = {}
    
    def __init__(self, name, count, mass=None):
        self.Name = f'{name}' if count == 1 else f'{count}({name})'
        self.Count = 1    # there's only one of these even if the 'count' (really a multiplier) is gretaer
        self.Mass = mass if mass else self.Mods[name]*count
    
    # Make the Composition from a (Name, Count) tuple
    @classmethod
    def from_tuple(cls, t):
        return Composition(t[0],t[1])

    # make a composition from a list of (Name,Count)tuples
    @classmethod
    def from_tuple_list(cls, t_list):
        comp = None
        
        for t in t_list:           
            if not comp:               
                comp = Composition.from_tuple(t)    #creat a comp from the first in the list so we can append others to it
            else:
                comp2 = Composition.from_tuple(t)
                comp = comp.add_comp(comp2, sep='.')
                
        return comp
    
    @classmethod
    def proton(cls):
        return Composition('H+', 1, 1.00727)
    
    def protonate(self):
        return self.add_comp(Composition('H+', 1, 1.00727), sep='.')
    
    def deprotonate(self):
        return self.add_comp(Composition('[-H+]-', 1, -1.00727), sep='.')

    def make_copy(self, mult=1):
        return Composition(self.Name, self.Count*mult, self.Mass*mult)
    
    def label(self):
        return self.Name
    
    # Merege two compositions to generate a new one with a mass
    def add_comp(self, comp1, sep='-'):
        new_name = self.label() + sep + comp1.label()
        new_mass = self.Mass + comp1.Mass
        return Composition( new_name, 1, mass=new_mass)
    
    # expands a composition by generating a list of Name repeated Count times
    def expand(self):
        result = [self.Name for n in range(self.Count)]
        return result

Composition.Mods = {'OH':15.99492,
        'COOH':29.97418,     #COOH is CH3->COOH, i.e. +O2, -H2)
        'gluc':176.032088,
        'sulphate':79.956815,
        'NH3':17.026549,     #adducts from here
        'Na-H':21.981944,
        'K-H':37.955881,
        'Ca-2H': 37.946941,
        'H2O':-18.010565,
        'NaAc': 82.003074,
        'NaFo': 67.987424,     # sodium formate
        'C2H4O2':60.021129,
        'CH2O2':46.004931,
        'CO2':-43.989829,
        'C2H4': -28.0313}
 
print('Proton: ', Composition.proton())

a = Composition('Na-H',2)
print('Normal init:', a)

b = Composition.from_tuple(('K-H',2))
print('From tuple:', b)

ab = a.add_comp(b, sep='.')
print('From merge:', ab)

print('Expanded:', a.expand())

t_list = [('Na-H',2),('K-H',2), ('NH3', 1)]
abc = Composition.from_tuple_list(t_list)
print('From tuple list:', abc)

Proton:  Composition(Name='H+', Count=1, Mass=1.00727)
Normal init: Composition(Name='2(Na-H)', Count=1, Mass=43.963888)
From tuple: Composition(Name='2(K-H)', Count=1, Mass=75.911762)
From merge: Composition(Name='2(Na-H).2(K-H)', Count=1, Mass=119.87565)
Expanded: ['2(Na-H)']
From tuple list: Composition(Name='2(Na-H).2(K-H).NH3', Count=1, Mass=136.902199)


In [16]:
def make_combinations(limit_list, max_combinations):
    """Given a list of limits in as tuples (comp, upper_limit), return all combinations to a given maximum value
    """

    entities = []

    # Use the limit_list to generate an expanded list of individual entities, i.e. [('X', 2), ('Y',2)] +> X, X, Y, Y
    for l in limit_list:
        for n in range(l[1]):
            entities.append(l[0])

    entity_combinations = []

    # Now find all combinations of 1 entity, 2 entities...to the max number required
    # This will include duplicates, e.g. x,y and y,x
    for i in range(1, max_combinations + 1): 
        entity_combinations += list(combinations(entities, i))

    # making this into a set will find the unique combinations.
    # initially the combination tuple were sorted to make sure tey were canonicalized, but this doesn't seem to be needed
    # i.e. combs = [tuple(sorted(c)) for c in entity_combinations]; cel = set(combs)
    
    csl=set(entity_combinations)   

    csa = []

    # we convert these back into the form ('x',2)('y,'1) by grouping the elements of each combination
    # and recording the element and its count...Note each group has to be converted to a list for this to work
    for c in csl:
        csa.append( [(key, len(list(group))) for key, group in groupby(c)])
    
    return csa
        
combs = make_combinations([('y',3), ('x',2), ('z',2)], 3)
print(len(combs))   #...should be 17

17


In [17]:
# add modifications, provided as a list of (mods, max count) tuples to each compound in the list
# return the new compound list
def add_mods(compounds, limits):

    mods = []

    # Make the compounds by copying the base and adding the possible mods
    for c in compounds:
        for l in limits:
            for i in range(l[1]):
                new_comp = c.make_copy().add_comp(Composition(l[0], i+1))
                mods.append(new_comp)
                #print(new_comp)

    compounds += mods
    
    return compounds


In [27]:
base_name, base_mass = 'Ibu', 206.1307   #Iidentifier + MW
# base_name = 'x543'
# base_mass = 543.2068+1.00727

multimer_limit = 3
max_adduct_count = 4 # total number of adducts allowed
ionization = 'negative'
include_hetero_dimers = True
base_mods = [] #['C2H4']

In [34]:
if ionization == 'negative':
    phase1_limits = [('OH', 1), ('COOH', 1)]  # metabolite modifications - phase 1
    phase2_limits = [('gluc', 1)] #, ('sulphate', 1)]
    adduct_limits = [('Na-H', 3), ('K-H', 2), ('NaAc',2), ('NaFo', 1)]  #, ('NH3', 1), ('NaAc',2)
    loss_limits = [('H2O',2), ('CO2',1)]
else:
    phase1_limits = [('OH', 2), ('COOH', 1)]  # metabolite modifications - phase 1
    phase2_limits = [('gluc', 2)]
    adduct_limits = [('Na-H', 3), ('K-H', 3), ('NH3', 1), ('NaAc',2), ('NaFo', 1)]
    loss_limits = [('H2O',2)]

In [35]:
adduct_combs = make_combinations(adduct_limits, 4)
    
adduct_comps = [Composition.from_tuple_list(c) for c in adduct_combs]
adduct_comps = sorted(adduct_comps, key=lambda x: x.Mass)

print(len(adduct_comps),'adduct forms')

# for ac in adduct_comps:
#     print(ac)

43 adduct forms


In [36]:
base_compound = Composition(base_name, 1, base_mass)

compounds = [base_compound]

if base_mods:
    for c in base_mods:
        new_comp = base_compound.make_copy().add_comp(Composition(c, 1))   #limited to 1
        compounds.append(new_comp)
        
# Make the compounds by copying the base and adding the possible mods

compounds = add_mods(compounds, phase1_limits)
print(len(compounds), 'after phase 1')

compounds = add_mods(compounds, phase2_limits)
print(len(compounds), 'after phase 2')

multimers = []

for c in compounds:
    for m in range(2, multimer_limit+1):
        new_comp = c.make_copy(m)
        multimers.append(new_comp)

if include_hetero_dimers:
    for i, c in enumerate(compounds):
        for j in range(i+1, len(compounds)):
            new_comp = c.make_copy()
            new_comp_2 = compounds[j].make_copy()
            new_comp = new_comp.add_comp(new_comp_2, sep='+')
            multimers.append(new_comp)
    
compounds += multimers

print(len(compounds), 'with multimers')

compounds = add_mods(compounds, loss_limits)
print (len(compounds), 'after losses')

# for c in compounds:
#     print(c)

3 after phase 1
6 after phase 2
33 with multimers
132 after losses


In [37]:
ion_forms = []  #[base_compound.add_comp(Composition.proton(), sep='.')]

for c in compounds:
    
    # add the base compound, with a proton or with one subtracted
    new_comp = c.make_copy()
    new_comp = new_comp.deprotonate()
    ion_forms.append(new_comp)   
    
    # then add the adduct forms
    for a in adduct_comps:
        new_comp = c.make_copy().add_comp(a, sep='.')
        if ionization == 'negative':
            new_comp = new_comp.deprotonate()
        else:
            new_comp = new_comp.protonate()            
        ion_forms.append(new_comp)       
        
print(len(ion_forms))
# for ion in ion_forms:
#     print(ion)

5808


In [38]:
import os

print(len(ion_forms))

ion_forms = sorted(ion_forms, key=lambda x: x.Mass)

# Set up fie names and paths...
f_dir = os.sep + os.path.join('Users','ronbonner','Data','PCA')

if ionization == "negative":
    f_name = f'{base_name} ions neg.txt'
else:
    f_name = f'{base_name} ions pos.txt'

data_path = os.path.join(f_dir, f_name)

print (data_path)

with open(data_path, 'w') as f:
    
    for ion in ion_forms:
        #print(f'{ion.Mass:10.4f}  {ion.Name}')
        f.write(f'{ion.Mass:10.4f}  {ion.Name}\n')
    
    f.close()
        

5808
/Users/ronbonner/Data/PCA/Ibu ions neg.txt


In [39]:
print(compounds[:10])

[Composition(Name='Ibu', Count=1, Mass=206.1307), Composition(Name='Ibu-OH', Count=1, Mass=222.12562), Composition(Name='Ibu-COOH', Count=1, Mass=236.10487999999998), Composition(Name='Ibu-gluc', Count=1, Mass=382.162788), Composition(Name='Ibu-OH-gluc', Count=1, Mass=398.15770799999996), Composition(Name='Ibu-COOH-gluc', Count=1, Mass=412.13696799999997), Composition(Name='2(Ibu)', Count=1, Mass=412.2614), Composition(Name='3(Ibu)', Count=1, Mass=618.3921), Composition(Name='2(Ibu-OH)', Count=1, Mass=444.25124), Composition(Name='3(Ibu-OH)', Count=1, Mass=666.37686)]
