### Improt modules

In [1]:
import pybel
import pandas as pd

  from tqdm.autonotebook import tqdm


### Convert BEL to dataframes

In [2]:
def bel_to_df(path, database, key):
    graph = pybel.from_nodelink_file(path)
    
    rows = []
    
    for source, target, data in graph.edges(data=True):
        
        if isinstance(source, pybel.dsl.Population) and isinstance(target, pybel.dsl.Abundance):
            
            if not source.curie.startswith('ncbitaxon:'):
                continue
            
            rows.append({
                'plant_curie': source.curie,
                'chemical_curie': target.curie,
                'database': database,
                'evidence': data[key] if key == 'evidence' else data[key]['identifier'],
            })
            
    return pd.DataFrame(rows)
        

In [3]:
DATA_DIR = '../data'

coconut = bel_to_df(
    f'{DATA_DIR}/data_files/coconut.bel.nodelink.json',
    'coconut',
    'evidence'
)

lotus = bel_to_df(
    f'{DATA_DIR}/data_files/lotus.bel.nodelink.json',
    'lotus',
    'evidence',
)

In [4]:
plant_chemical_associations_df = pd.concat([
        coconut,
        lotus,
    ],
    ignore_index=True,
)

In [5]:
plant_chemical_associations_df.shape

(330655, 4)

In [6]:
plant_chemical_associations_df.to_csv(
    f'{DATA_DIR}/processed_data/plant-chemical/plant_chemical_associations.tsv', sep='\t', index=False
)