In [1]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
import networkx as nx
from beams import in_out
from beams.grouping import group_features
from beams.annotation import annotate_adducts
from beams.annotation import annotate_isotopes
from beams.annotation import annotate_compounds
from beams.annotation import summary

In [2]:
path = "../tests/test_data/"
fn_peaklist = os.path.join(path, "variableMetadata.txt")
fn_matrix = os.path.join(path, "dataMatrix.txt")

df = in_out.combine_peaklist_matrix(fn_peaklist, fn_matrix)

ion_mode = "pos"
db_out = "results_{}.sqlite".format(ion_mode)

graphs = group_features(df, db_out, max_rt_diff=5.0, coeff_thres=0.7, pvalue_thres=0.01, method="pearson")

nx.write_gml(graphs, "graphs.gml")
graphs = nx.read_gml("graphs.gml")

  0%|          | 0/400 [00:00<?, ?it/s]  0%|          | 2/400 [00:00<00:20, 19.60it/s]  1%|          | 4/400 [00:00<00:21, 18.57it/s]  2%|▏         | 6/400 [00:00<00:21, 18.65it/s]  2%|▏         | 9/400 [00:00<00:20, 19.06it/s]  3%|▎         | 11/400 [00:00<00:20, 18.76it/s]  4%|▎         | 14/400 [00:00<00:19, 19.32it/s]  4%|▍         | 16/400 [00:00<00:19, 19.32it/s]  4%|▍         | 18/400 [00:00<00:20, 18.64it/s]  5%|▌         | 21/400 [00:01<00:19, 19.27it/s]  6%|▌         | 24/400 [00:01<00:19, 19.77it/s]  6%|▋         | 26/400 [00:01<00:21, 17.45it/s]  7%|▋         | 29/400 [00:01<00:19, 18.60it/s]  8%|▊         | 32/400 [00:01<00:18, 19.38it/s]  8%|▊         | 34/400 [00:01<00:19, 18.61it/s]  9%|▉         | 37/400 [00:01<00:18, 19.41it/s] 10%|█         | 40/400 [00:02<00:17, 20.07it/s] 11%|█         | 43/400 [00:02<00:18, 19.48it/s] 12%|█▏        | 46/400 [00:02<00:17, 20.13it/s] 12%|█▏        | 49/400 [00:02<00:16, 20.71it/s] 13%|█▎        | 52/400 [00:02<0

In [3]:
path = "../beams/data"
lib_isotopes = in_out.read_isotopes(os.path.join(path, "isotopes.txt"), ion_mode)
lib_adducts = in_out.read_adducts(os.path.join(path, "adducts.txt"), ion_mode)

print(lib_isotopes)
print(lib_adducts)

ppm = 5.0

Isotopes in library:
--------------------------------------------
label_x	label_y	mass_difference	abundance_x	abundance_y
C	(13C)	1.003355	100.0	1.1
S	(34S)	1.995796	100.0	4.21
K	(41K)	1.998117	100.0	6.73

Adducts in library
-----------------
name	exact_mass	ion_mode
[M+H]+	1.0072764
[M+Na]+	22.9892214
[M+K]+	38.9631594



In [4]:
annotate_adducts(graphs, db_out, ppm, lib_adducts)
df_out = summary(df, db_out)
print(df_out)

          name          mz          rt     intensity  group_id  degree_cor  \
0      M151T34  150.886715   34.152700  3.865964e+06       1.0        18.0   
1      M151T40  151.040235   39.838172  1.046898e+06      12.0         1.0   
2      M152T40  152.043607   40.303700  7.163655e+04      12.0         1.0   
3      M153T34  152.883824   34.174647  5.782935e+06       1.0        17.0   
4      M153T34  152.883824   34.174647  5.782935e+06       1.0        17.0   
5      M153T36  153.019474   35.785847  9.125698e+06       3.0         1.0   
6      M153T40  153.055906   39.714508  6.287670e+04       1.0         1.0   
7      M154T36  154.022860   36.222433  5.334214e+05       3.0         1.0   
8      M154T37  154.062402   37.183625  3.882610e+06       1.0         6.0   
9      M155T34  154.880902   33.957225  2.312124e+06       1.0         7.0   
10     M155T38  155.065788   37.984295  1.634999e+05       1.0         4.0   
11     M156T37  155.874359   36.783559  5.574522e+05       6.0  

In [5]:
annotate_isotopes(graphs, db_out, ppm, lib_isotopes)
df_out = summary(df, db_out)
print(df_out)

          name          mz          rt     intensity  group_id  degree_cor  \
0      M151T34  150.886715   34.152700  3.865964e+06       1.0        18.0   
1      M151T40  151.040235   39.838172  1.046898e+06      12.0         1.0   
2      M152T40  152.043607   40.303700  7.163655e+04      12.0         1.0   
3      M153T34  152.883824   34.174647  5.782935e+06       1.0        17.0   
4      M153T34  152.883824   34.174647  5.782935e+06       1.0        17.0   
5      M153T36  153.019474   35.785847  9.125698e+06       3.0         1.0   
6      M153T40  153.055906   39.714508  6.287670e+04       1.0         1.0   
7      M154T36  154.022860   36.222433  5.334214e+05       3.0         1.0   
8      M154T37  154.062402   37.183625  3.882610e+06       1.0         6.0   
9      M155T34  154.880902   33.957225  2.312124e+06       1.0         7.0   
10     M155T38  155.065788   37.984295  1.634999e+05       1.0         4.0   
11     M156T37  155.874359   36.783559  5.574522e+05       6.0  

In [6]:
annotate_compounds(df, lib_adducts, ppm, db_out, "lipidmaps_full_20181217_v1")
df_out = summary(df, db_out)
print(df_out)

          name          mz          rt     intensity  group_id  degree_cor  \
0      M151T34  150.886715   34.152700  3.865964e+06       1.0        18.0   
1      M151T40  151.040235   39.838172  1.046898e+06      12.0         1.0   
2      M152T40  152.043607   40.303700  7.163655e+04      12.0         1.0   
3      M153T34  152.883824   34.174647  5.782935e+06       1.0        17.0   
4      M153T34  152.883824   34.174647  5.782935e+06       1.0        17.0   
5      M153T36  153.019474   35.785847  9.125698e+06       3.0         1.0   
6      M153T40  153.055906   39.714508  6.287670e+04       1.0         1.0   
7      M154T36  154.022860   36.222433  5.334214e+05       3.0         1.0   
8      M154T37  154.062402   37.183625  3.882610e+06       1.0         6.0   
9      M155T34  154.880902   33.957225  2.312124e+06       1.0         7.0   
10     M155T38  155.065788   37.984295  1.634999e+05       1.0         4.0   
11     M156T37  155.874359   36.783559  5.574522e+05       6.0  