# Map BEIS and MEGAN species to CRACMM

---
    author: Havala Pye
    date: 2024-08-08

    updated: Nash Skipper
    date: 2024-08-09

    updated: Michael Pye
    date: 2025-02-27
---
## Notebook Description
This Notebook identifies the CRACMM species for each BEIS/MEGAN species using the mapper. The cracmm_mapper function depends on [rdkit](https://www.rdkit.org/).

## Download Notebook
Click [here](https://github.com/USEPA/CRACMM/blob/main/utilities/BEISMEGAN_biogenicmapping2cracmm.ipynb) to access the Jupyter Notebook file directly in GitHub where it can be downloaded.  


## Setup

In [None]:
import pandas as pd
import os

In [2]:
## Install rdkit if not already installed

# !python -m pip install --user rdkit

# to install in the current kernel:
# %pip install rdkit

In [None]:
# set location of mapper downloaded from https://github.com/USEPA/CRACMM/
# import sys
# utildir = '/path/to/cracmm/utilities/directory'   
# sys.path.append(utildir)

# Import the python utilities
import cracmm1_mapper as cracmm1   # includes: get_cracmm_roc(smiles,koh,log10cstar) (Version 1)
import cracmm2_mapper as cracmm2   # includes: get_cracmm_roc(smiles,koh,log10cstar) (Version 2)

In [None]:
datadir = '../emissions/BiogenicMappings/'    # data files of mappings
outputdir = os.path.join(os.getcwd(), 'output/')

In [5]:
pd.set_option('display.max_rows', None)
pd.options.mode.copy_on_write = True
csvout_kw = dict(sep=',', na_rep='', float_format=None, columns=None, header=True, index=False)

## BEIS
input beis mapping from https://github.com/USEPA/CRACMM/tree/main/emissions/BiogenicMappings

In [None]:
filename = datadir + 'bvoc_beis_tocracmm.csv' 
dfbeis = pd.read_csv(filename)
# for checking if any species mapping changed
orig_map_colname = 'CRACMM1' # an existing version in file to compare to, options: CRACMM1, CRACMM2
dfbeis = dfbeis.rename(columns=dict(CRACMMorig=orig_map_colname))

# run cracmm2 mapper
smiles_k = 'SMILES'
koh_k    = 'ATMOSPHERIC_HYDROXYLATION_RATE_(AOH)_CM3/MOLECULE*SEC_OPERA_PRED'
cstar_k  = 'log10Cstar_ugm3'
dfbeis['CRACMMnew'] = dfbeis.apply(lambda x: cracmm2.get_cracmm_roc(x[smiles_k], x[koh_k], x[cstar_k]), axis=1)

# check if any species mappings changed
dfbeis_checkmatch = dfbeis.eval(f'match = {orig_map_colname}==CRACMMnew')
show_cols = ['SPECIES_NAME',orig_map_colname,'CRACMMnew']
if len(dfbeis_checkmatch[dfbeis_checkmatch.match==False])>0:
    print(f'the species mappings below changed from {orig_map_colname}')
    display(dfbeis_checkmatch[show_cols][dfbeis_checkmatch.match==False])
else:
    print(f'all species matched {orig_map_colname} mapping')

# save output
#dfbeis = dfbeis.drop(columns=orig_map_colname)
dfbeis.to_csv(outputdir+'bvoc_beis_tocracmm.csv', **csvout_kw)

the species mappings below changed from CRACMM2alpha




Unnamed: 0,SPECIES_NAME,CRACMM2alpha,CRACMM2
12,para-cymene,ROCP6ARO,VROCP6ARO
32,carbon monoxide,SLOWROC,UNKCRACMM


## MEGAN
input megan mapping from https://github.com/USEPA/CRACMM/tree/main/emissions/BiogenicMappings

In [None]:
filename = datadir + 'bvoc_megan_tocracmm.csv'
dfmegan = pd.read_csv(filename)
# for checking if any species mapping changed
orig_map_colname = 'CRACMM1' # an existing version in file to compare to, options: CRACMM1, CRACMM2
dfmegan = dfmegan.rename(columns=dict(CRACMMorig=orig_map_colname))

# run cracmm2 mapper
smiles_k = 'SMILES'
koh_k    = 'ATMOSPHERIC_HYDROXYLATION_RATE_(AOH)_CM3/MOLECULE*SEC_OPERA_PRED'
cstar_k  = 'log10Cstar_ugm3'
dfmegan['CRACMMnew'] = dfmegan.apply(lambda x: cracmm2.get_cracmm_roc(x[smiles_k], x[koh_k], x[cstar_k]), axis=1)

# check if any species mappings changed
dfmegan_checkmatch = dfmegan.eval(f'match = {orig_map_colname}==CRACMMnew')
show_cols = ['REPRESENTATIVE_COMPOUND_NAME',orig_map_colname,'CRACMMnew']
if len(dfmegan_checkmatch[dfmegan_checkmatch.match==False])>0:
    print(f'the species mappings below changed from {orig_map_colname}')
    display(dfmegan_checkmatch[show_cols][dfmegan_checkmatch.match==False])
else:
    print(f'all species matched {orig_map_colname} mapping')

# save output
#dfmegan = dfmegan.drop(columns=orig_map_colname)
dfmegan.to_csv(outputdir+'bvoc_beis_tocracmm.csv', **csvout_kw)



the species mappings below changed from CRACMM2alpha




Unnamed: 0,REPRESENTATIVE_COMPOUND_NAME,CRACMM2alpha,CRACMM2
24,p-Cymene,ROCP6ARO,VROCP6ARO
30,Estragole,ROCP6ARO,VROCP6ARO
32,beta-Ionone,ROCP6ARO,VROCP6ARO
36,1-Octen-3-ol,ROCP6ARO,VROCP6ARO
55,Farnesol,ROCP5ARO,VROCP5ARO
62,cis-Nerolidol,ROCP5ARO,VROCP5ARO
63,trans-Nerolidol,ROCP5ARO,VROCP5ARO
67,2-Ethylhexyl salicylate,ROCP2OXY2,VROCP2OXY2
70,(-)-alpha-Cadinol,ROCP5ARO,VROCP5ARO
72,(+)-Cedrol,ROCP5OXY1,VROCP5OXY1
