# Improving the annotation of metabolite transporters

First off, we are importing packages that might come in handy.

In [60]:
import cobra
import numpy as np
import scipy as sp
import pandas as pd
import requests
from io import StringIO
from Bio import SeqIO
import pickle
import networkx as nx

Importing most recenet TC numbers, corresponding CHEBI ID of substrates, uniprot ID and GO terms from TCDB

In [2]:
tc_chebi_url = "https://www.tcdb.org/cgi-bin/substrates/getSubstrates.py"
uniprot_tc_url = "https://www.tcdb.org/cgi-bin/projectv/public/acc2tcid.py"
go_tc_url = "https://www.tcdb.org/cgi-bin/projectv/public/go.py"
fasta_tcdb_url = "https://www.tcdb.org/public/tcdb"

def fetch_data(url):
    response = requests.get(url)
    response.raise_for_status()
    return response.text

def parse_data(tc_chebi_text, uniprot_tc_text, go_tc_text, fasta_tcdb_text):
    # TC-CHEBI
    tc_chebi_lines = tc_chebi_text.strip().split("\n")
    tc_chebi_data = []
    for line in tc_chebi_lines:
        tc_number, chebi_ids = line.split("\t")
        chebi_id_list = [id.split(";")[0].replace("CHEBI:", "") for id in chebi_ids.split("|")]
        tc_chebi_data.append([tc_number, chebi_id_list])

    df_chebi = pd.DataFrame(tc_chebi_data, columns=["TC Number", "CHEBI IDs"])

    # UniProt-TC
    uniprot_tc_lines = uniprot_tc_text.strip().split("\n")
    uniprot_tc_data = [line.split("\t") for line in uniprot_tc_lines]
    df_uniprot = pd.DataFrame(uniprot_tc_data, columns=["UniProt ID", "TC Number"])

    # GO-TC
    go_tc_lines = go_tc_text.strip().split("\n")
    go_tc_data = [line.split("\t")[:2] for line in go_tc_lines]
    df_go = pd.DataFrame(go_tc_data, columns=["GO Term", "TC Number"])

    # FASTA-TC
    fasta_io = StringIO(fasta_tcdb_text)
    tc_data = []
    for record in SeqIO.parse(fasta_io, "fasta"):
        header = record.description

        uniprot = header.split("|")[2]
        tc_number = header.split("|")[3].split()[0]
        sequence = str(record.seq)
        tc_data.append([tc_number, uniprot, sequence])
    df_fasta = pd.DataFrame(tc_data, columns=["TC Number", "UniProt ID", "AA Sequence"])

    return df_chebi, df_uniprot, df_go, df_fasta

In [3]:
tc_chebi_text = fetch_data(tc_chebi_url)
uniprot_tc_text = fetch_data(uniprot_tc_url)
go_tc_text = fetch_data(go_tc_url)
fasta_tcdb_text = fetch_data(fasta_tcdb_url)

In [8]:
df_chebi, df_uniprot, df_go, df_fasta = parse_data(tc_chebi_text, uniprot_tc_text, go_tc_text, fasta_tcdb_text)
df_chebi.to_csv("tcdb_chebis.csv", index=False)
df_merged = pd.merge(df_chebi, df_fasta, on="TC Number", how="left")
df_merged = df_merged.explode('CHEBI IDs')
df_merged = pd.merge(df_merged, df_go[["TC Number", "GO Term"]], on="TC Number", how="left")
df_merged = df_merged.drop_duplicates()
df_merged.to_csv("tcdb_data_combined.csv", index=False)
df_merged

Unnamed: 0,TC Number,CHEBI IDs,UniProt ID,AA Sequence,GO Term
0,2.A.108.2.1,30179,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0016020
1,2.A.108.2.1,30179,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0009055
2,2.A.108.2.1,30179,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0020037
3,2.A.108.2.1,30179,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0055085
4,1.A.1.5.30,3473,Q06IP1,MNYRDVSKVHFGGDDVSLYGTPKEELGPGQLCVGAAGAPPGVEPKP...,
...,...,...,...,...,...
154251,1.A.8.12.1,5448,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0016021
154252,1.A.8.12.1,5448,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0043661
154253,1.A.8.12.1,5448,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0005215
154254,1.A.8.12.1,5448,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0009877


Making an easy way to convert from primary/secondaryy ChEBI ID to primary ChEBI ID only.

In [6]:
prim_sec_chebi = pd.read_csv("chebi_data/primary_secondary_chebi_ids.tsv", sep="\t")

secondary_to_primary =  {}

for _, row in prim_sec_chebi.iterrows():
    primary_id = int(row["Primary_CHEBI_ID"])
    secondary_ids = eval(row["Secondary_CHEBI_IDs"])


    for s_id in secondary_ids:
        secondary_to_primary[s_id] = primary_id

def get_primary_id(chebi_id):
    chebi_id = int(chebi_id)
    return secondary_to_primary.get(str(chebi_id), chebi_id)

Convert all ChEBI IDs to primary IDs for the TCDB df

In [9]:
# df_merged["CHEBI IDs"] = pd.to_numeric(df_merged["CHEBI IDs"], errors="raise")
# df_merged.loc[:, "ChEBI Primary IDs"] = df_merged["CHEBI IDs"].apply(get_primary_id)
# df_merged
df_merged["CHEBI IDs"] = pd.to_numeric(df_merged["CHEBI IDs"], errors="raise")
df_merged["CHEBI IDs"] = df_merged["CHEBI IDs"].apply(get_primary_id)
df_merged

Unnamed: 0,TC Number,CHEBI IDs,UniProt ID,AA Sequence,GO Term
0,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0016020
1,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0009055
2,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0020037
3,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0055085
4,1.A.1.5.30,3473,Q06IP1,MNYRDVSKVHFGGDDVSLYGTPKEELGPGQLCVGAAGAPPGVEPKP...,
...,...,...,...,...,...
154251,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0016021
154252,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0043661
154253,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0005215
154254,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0009877


Creating the filter that is applied to only find accurate ChEBIs. See chebi_data/leaf_children.ipynb for more info regarding the filter choice.

The method used, is Method 2. This gives best performance (includes the most IDs where info is obtainable).
This filter aims to only leave in leaf nodes, and parents that have no children present in the df from TCDB.

First loading in the hierarchy pr 2024-08-01.

In [10]:
df_hierarchy = pd.read_csv("chebi_data/chebiHierarchy.tsv", sep="\t")
df_hierarchy['child'] = df_hierarchy['child'].str.extract(r'CHEBI_(\d+)').astype(int)
df_hierarchy['parent'] = df_hierarchy['parent'].str.extract(r'CHEBI_(\d+)').astype(int)

# all_primary_chebi_tcdb = set(df_merged["ChEBI Primary IDs"].unique())
all_primary_chebi_tcdb = set(df_merged["CHEBI IDs"].unique())


df_hierarchy_prim = df_hierarchy.copy()
df_hierarchy_prim["child"] = df_hierarchy["child"].apply(get_primary_id)
df_hierarchy_prim["parent"] = df_hierarchy["parent"].apply(get_primary_id)

Finding all the parents from the original (TCDB) df that HAS a child listed in the df.

In [15]:
parents_w_children_in_tcdb = set(df_hierarchy_prim[df_hierarchy_prim["child"].isin(all_primary_chebi_tcdb)]["parent"])

chebi_ids_to_remove = parents_w_children_in_tcdb.intersection(all_primary_chebi_tcdb)

filtered_chebis = all_primary_chebi_tcdb - chebi_ids_to_remove
len(chebi_ids_to_remove)

201

Applying the filter where there are only leaf nodes and parents without children, and reducing df_merged correspondingly.

In [48]:
# df_filtered = df_merged[df_merged["ChEBI Primary IDs"].isin(filtered_chebis)]
df_filtered = df_merged[df_merged["CHEBI IDs"].isin(filtered_chebis)]
df_filtered

Unnamed: 0,TC Number,CHEBI IDs,UniProt ID,AA Sequence,GO Term
0,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0016020
1,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0009055
2,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0020037
3,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0055085
4,1.A.1.5.30,3473,Q06IP1,MNYRDVSKVHFGGDDVSLYGTPKEELGPGQLCVGAAGAPPGVEPKP...,
...,...,...,...,...,...
154251,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0016021
154252,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0043661
154253,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0005215
154254,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0009877


Now I want to append all the ChEBI information to the substrates. Charge, formula, MW, and SMILES, to be precise. But first I need to make sure it is the primary ChEBI ID that is in use for both dfs. To reduce mismatches.

In [49]:
chebi_df = pd.read_csv("chebi_data\chebiDf.tsv", sep="\t")
chebi_df["chebi"] = chebi_df["chebi"].str.extract(r"CHEBI_(\d+)").astype(int)
chebi_df = chebi_df.drop(columns=["inchi", "inchikey"])

chebi_df.loc[:, "chebi_primary"] = chebi_df["chebi"].apply(get_primary_id)

In [50]:
# df_chebi_info = df_filtered.merge(chebi_df, left_on="ChEBI Primary IDs", right_on="chebi_primary", how="left")
df_chebi_info = df_filtered.merge(chebi_df, left_on="CHEBI IDs", right_on="chebi_primary", how="left")
df_chebi_info.rename(columns={"label":"chebi_label"}, inplace=True)
df_chebi_info

Unnamed: 0,TC Number,CHEBI IDs,UniProt ID,AA Sequence,GO Term,charge,chebi,formula,chebi_label,mass,smiles,chebi_primary
0,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0016020,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
1,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0009055,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
2,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0020037,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
3,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0055085,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
4,1.A.1.5.30,3473,Q06IP1,MNYRDVSKVHFGGDDVSLYGTPKEELGPGQLCVGAAGAPPGVEPKP...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
64652,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0016021,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0
64653,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0043661,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0
64654,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0005215,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0
64655,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0009877,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0


Alternatively, without using the filter at all....

In [54]:
# df_chebi_info = df_merged.merge(chebi_df, left_on="ChEBI Primary IDs", right_on="chebi_primary", how="left")
df_chebi_info = df_merged.merge(chebi_df, left_on="CHEBI IDs", right_on="chebi_primary", how="left")
df_chebi_info.rename(columns={"label":"chebi_label"}, inplace=True)
df_chebi_info

Unnamed: 0,TC Number,CHEBI IDs,UniProt ID,AA Sequence,GO Term,charge,chebi,formula,chebi_label,mass,smiles,chebi_primary
0,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0016020,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
1,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0009055,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
2,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0020037,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
3,2.A.108.2.1,49807,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0055085,2.0,49807.0,Pb,lead(2+),207.20000,[Pb++],49807.0
4,1.A.1.5.30,3473,Q06IP1,MNYRDVSKVHFGGDDVSLYGTPKEELGPGQLCVGAAGAPPGVEPKP...,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
94851,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0016021,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0
94852,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0043661,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0
94853,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0005215,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0
94854,1.A.8.12.1,17754,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0009877,0.0,17754.0,C3H8O3,glycerol,92.09382,OCC(O)CO,17754.0


Worth noting that e- with ChEBI 10545 is missing. Should be inserted manually with all its properties. Change is->was when done

In [55]:
# df_chebi_info[df_chebi_info["CHEBI IDs"] == 1]
df_missing_chebis = df_chebi_info[df_chebi_info['chebi'].isna()]
print(f"There are {len(df_missing_chebis)} ChEBI IDs that cannot be connected to any info, i.e. are too broad, like 'molecule' or 'polypeptide'.")

There are 33710 ChEBI IDs that cannot be connected to any info, i.e. are too broad, like 'molecule' or 'polypeptide'.


Next up, removing all instances from the df where there is no info to get on the ChEBI substrate

In [56]:
valid_chebis = chebi_df["chebi"]
# df_chebi_info_filtered = df_chebi_info[df_chebi_info["ChEBI Primary IDs"].isin(valid_chebis)]
df_chebi_info_filtered = df_chebi_info[df_chebi_info["CHEBI IDs"].isin(valid_chebis)]

# Some quick deletions and rearranging of the df, to make it more tidy
df_chebi_info_filtered = df_chebi_info_filtered.drop(columns=["chebi", "chebi_primary"])
column_to_move = df_chebi_info_filtered.pop("CHEBI IDs")
df_chebi_info_filtered.insert(4, "CHEBI IDs", column_to_move)

df_chebi_info_filtered = df_chebi_info_filtered.reset_index(drop=True)
# df_chebi_info_filtered.to_csv("tcdb_data_chebi.csv", index=False)
df_chebi_info_filtered

Unnamed: 0,TC Number,UniProt ID,AA Sequence,GO Term,CHEBI IDs,charge,formula,chebi_label,mass,smiles
0,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0016020,49807,2.0,Pb,lead(2+),207.20000,[Pb++]
1,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0009055,49807,2.0,Pb,lead(2+),207.20000,[Pb++]
2,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0020037,49807,2.0,Pb,lead(2+),207.20000,[Pb++]
3,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0055085,49807,2.0,Pb,lead(2+),207.20000,[Pb++]
4,2.A.89.3.5,Q9LSF6,MTSNVQLSETNSPRNQKTRPRAEKEEVDYMQRAQWLRAALLGANDG...,GO:0016021,29033,2.0,Fe,iron(2+),55.84500,[Fe++]
...,...,...,...,...,...,...,...,...,...,...
61141,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0016021,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO
61142,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0043661,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO
61143,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0005215,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO
61144,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0009877,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO


In [80]:
# df_chebi_info_filtered.head(n=20)
# rows = [i for i in range(56264, 56270)]
df_chebi_info_filtered.iloc[[90,91,97,98,118,119],[0,1,3,4,6,7,8,9,10]]

Unnamed: 0,TC Number,UniProt ID,GO Term,CHEBI IDs,charge,formula,chebi_label,mass,smiles
90,1.A.75.1.1,Q92508,GO:0005789,9175,1.0,Na,sodium(1+),22.98977,[Na+]
91,1.A.75.1.1,Q92508,GO:0033116,9175,1.0,Na,sodium(1+),22.98977,[Na+]
97,1.A.75.1.1,Q92508,GO:0005789,8345,1.0,K,potassium(1+),39.0983,[K+]
98,1.A.75.1.1,Q92508,GO:0033116,8345,1.0,K,potassium(1+),39.0983,[K+]
118,1.A.75.1.1,Q92508,GO:0005789,35264,1.0,C4H12N,tetramethylammonium,74.14482,C[N+](C)(C)C
119,1.A.75.1.1,Q92508,GO:0033116,35264,1.0,C4H12N,tetramethylammonium,74.14482,C[N+](C)(C)C


Obtain all different ChEBIs in the df

In [57]:
chebis = set(df_chebi_info_filtered["CHEBI IDs"])
len(chebis)

1026

Add the amount of descendands for the ChEBI IDs

In [68]:
with open("chebi_data/hierarchy_chebi.pkl", "rb") as f:
    G = pickle.load(f)

def get_all_descendants_count(node):
    return len(nx.descendants(G, node))

In [69]:
df_chebi_info_filtered["CHEBI IDs"] = pd.to_numeric(df_chebi_info_filtered["CHEBI IDs"], errors="raise")
df_chebi_info_filtered["ChEBI Descendants"] = df_chebi_info_filtered["CHEBI IDs"].apply(get_all_descendants_count)
df_chebi_info_filtered.to_csv("tcdb_df.csv", index=False)
df_chebi_info_filtered

Unnamed: 0,TC Number,UniProt ID,AA Sequence,GO Term,CHEBI IDs,charge,formula,chebi_label,mass,smiles,ChEBI Descendants
0,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0016020,49807,2.0,Pb,lead(2+),207.20000,[Pb++],0
1,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0009055,49807,2.0,Pb,lead(2+),207.20000,[Pb++],0
2,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0020037,49807,2.0,Pb,lead(2+),207.20000,[Pb++],0
3,2.A.108.2.1,Q58AJ4,MQALRLLSIVLLSLFVTVSTAQADPLATQDKAKQIWQVLDYLAVDY...,GO:0055085,49807,2.0,Pb,lead(2+),207.20000,[Pb++],0
4,2.A.89.3.5,Q9LSF6,MTSNVQLSETNSPRNQKTRPRAEKEEVDYMQRAQWLRAALLGANDG...,GO:0016021,29033,2.0,Fe,iron(2+),55.84500,[Fe++],0
...,...,...,...,...,...,...,...,...,...,...,...
61141,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0016021,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO,0
61142,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0043661,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO,0
61143,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0005215,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO,0
61144,1.A.8.12.1,P08995,MADYSAGTESQEVVVNVTKNTSETIQRSDSLVSVPFLQKLVAEAVG...,GO:0009877,17754,0.0,C3H8O3,glycerol,92.09382,OCC(O)CO,0
