In [None]:
%run "catalog_common.py" 
ID_header('Open-FF:  Chemical Index', incl_links=True)
set_page_param()


In [None]:
import pandas as pd
import numpy as np
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)
from itables import show as iShow
import itables.options as opt

# #import core.Analysis_set as ana_set
# #import core.Construct_set as const_set
# import matplotlib.pyplot as plt
# %matplotlib inline
# pd.set_option('display.max_rows', 500)

df_cas = ana_set.Catalog_set(repo = repo_name, outdir='../common/').get_set(verbose=False)
gb2 = df_cas.groupby('bgCAS')['IngredientName'].agg(lambda x: x.value_counts().index[0])
gb2 = gb2.reset_index()
gb2.columns = ['bgCAS','comm_name']
df_cas = pd.merge(df_cas,gb2,on='bgCAS',how='left')


The interactive table below lists all chemicals (from the Open-FF data set) published in FracFocus after curation. Some chemicals are listed that have no records in the filtered set, but are in the full, raw data. Chemical identity is established using both CASNumber and IngredientName, when possible.  See the [CAS/Ingredient Index](Open-FF_CASNumber_and_IngredientName.html) for more detail on these identities. Below the table is an explanation of this tables columns.

The table is sort-able by any column (click a second time for reverse sort) and use the **Search** bar to limit what is shown in the table. 

In [None]:
YES = 'Yes'; NO = ''
chem_df = df_cas.groupby('bgCAS',as_index=False)[['UploadKey']].count()
chem_df = chem_df.rename({'UploadKey':'numRecords'},axis=1)
gb1 = df_cas[df_cas.in_std_filtered].groupby('bgCAS',as_index=False)[['UploadKey']].count()
chem_df = pd.merge(chem_df,gb1,on='bgCAS',how='left')
chem_df.fillna(0,inplace=True)
chem_df.numRecords = chem_df.UploadKey.astype('int').astype('str') + '<br>'+ chem_df.numRecords.astype('str')
t = df_cas[(df_cas.calcMass>0)&(df_cas.in_std_filtered)].groupby('bgCAS',as_index=False)[['UploadKey']].count()
t = t.rename({'UploadKey':'numWithMass'},axis=1)

chem_df = pd.merge(chem_df,t,on='bgCAS',how='left')
chem_df.numWithMass.fillna(0,inplace=True)

t = df_cas.groupby('bgCAS')[['bgIngredientName','comm_name','is_on_TEDX','is_on_prop65',#'is_on_CWA_priority',
                             'is_on_CWA','is_on_DWSHA','is_on_PFAS_list','is_on_volatile_list','DTXSID']].first()
t.is_on_TEDX = np.where(t.is_on_TEDX,YES,NO)
t.is_on_prop65 = np.where(t.is_on_prop65,YES,NO)
t.is_on_CWA = np.where(t.is_on_CWA,YES,NO)
t.is_on_DWSHA = np.where(t.is_on_DWSHA,YES,NO)
#t.is_on_CWA_priority = np.where(t.is_on_CWA_priority,YES,NO)
t.is_on_PFAS_list = np.where(t.is_on_PFAS_list,YES,NO)
t.is_on_volatile_list = np.where(t.is_on_volatile_list,YES,NO)
chem_df = pd.merge(chem_df,t,on='bgCAS',how='left')

t = df_cas[(df_cas.calcMass>0)&(df_cas.in_std_filtered)].groupby('bgCAS',as_index=False)['calcMass'].apply(np.percentile,90)
t.calcMass = t.calcMass.map(lambda x: round_sig(x,3))
chem_df = pd.merge(chem_df,t,on='bgCAS',how='left')
chem_df.calcMass.fillna(0,inplace=True)
#chem_df = chem_df.rename({'calcMass':'Mass of 90th percentile (lbs)'},axis=1)

chem_df['Filtered Data'] = chem_df.bgCAS.map(lambda x: getDataLink(x))
chem_df['History'] = chem_df.bgCAS.map(lambda x: getCatLink(x,x))
chem_df['ChemID'] = chem_df.bgCAS.map(lambda x: getChemIDLink(x))
chem_df['EPA_ref'] = chem_df.DTXSID.map(lambda x: getCompToxRef(x))

#chem_df.DTXSID = chem_df.DTXSID.str[-5:]
chem_df['molecule'] = chem_df.bgCAS.map(lambda x: getChemIDImg(x))
chem_df.molecule = chem_df.molecule
#opt.classes = ['display','compact']
opt.classes = ['display','cell-border']
#chem_df.bgCAS = chem_df.bgCAS+'</b>'
chem_df.bgIngredientName.fillna('non CAS',inplace=True)
chem_df['names'] = chem_df.bgIngredientName +'<br>----------<br>' + chem_df.comm_name
chem_df['just_cas'] = chem_df.bgCAS
chem_df.bgCAS = '<center><h3>'+chem_df.History+'</h3>'+chem_df.names+'</center>'
chem_df['ref'] = chem_df['Filtered Data']+'<br>'+chem_df.ChemID+'<br>'+chem_df.EPA_ref

chem_df['extrnl'] = np.where(chem_df.is_on_CWA,'CWA<br>','')
chem_df.extrnl = np.where(chem_df.is_on_DWSHA,chem_df.extrnl+'DWSHA<br>',chem_df.extrnl)
#chem_df.extrnl = np.where(chem_df.is_on_CWA_priority,chem_df.extrnl+'EPA_priority<br>',chem_df.extrnl)
#chem_df.extrnl = np.where(chem_df.is_on_CWA,chem_df.extrnl+'CWA<br>',chem_df.extrnl)
chem_df.extrnl = np.where(chem_df.is_on_TEDX,chem_df.extrnl+'TEDX<br>',chem_df.extrnl)
chem_df.extrnl = np.where(chem_df.is_on_prop65,chem_df.extrnl+'prop65<br>',chem_df.extrnl)
chem_df.extrnl = np.where(chem_df.is_on_PFAS_list,chem_df.extrnl+'EPA_PFAS<br>',chem_df.extrnl)
chem_df.extrnl = np.where(chem_df.is_on_volatile_list,chem_df.extrnl+'EPA_volatile<br>',chem_df.extrnl)

chem_df = chem_df[['bgCAS','ref','molecule',#'names',
                   #'bgIngredientName','comm_name',
                   'numRecords','numWithMass','calcMass','extrnl']]
                   #'is_on_TEDX','is_on_prop65',
                   #'is_on_CWA_SDWA','is_on_PFAS_list','is_on_volatile_list']]
                   #'History',
                   #'Filtered Data','ChemID']] #,'eh_Class_L1','eh_Class_L2']]
chem_df = chem_df.rename({'bgCAS':'Material','bgIngredientName':'Name', 'comm_name':'Common Name',
                          'numRecords':'total num records',
                          'numWithMass':'num records with mass','calcMass':'Mass of 90th percentile (lbs)',
                          'extrnl':'on external lists'},
                          #'is_on_TEDX':'on TEDX list','is_on_prop65':'on Prop 65 list',
                          #'is_on_CWA_SDWA':'on CWA SDWA lists',
                          #'is_on_PFAS_list':'is PFAS or precursor','is_on_volatile_list':'on EPA volatile list'},
                          #'eh_Class_L1':'eh Class lvl 1','eh_Class_L2':'eh Class lvl 2'},
                         axis=1)
iShow(chem_df.reset_index(drop=True),maxBytes=0,columnDefs=[{"width": "100px", "targets": ["Name","CAS Number"]}])
#iShow(chem_df.reset_index(drop=True),maxBytes=0)


**Important note about chemical names**: most chemical compounds have a number of names they are known by.  For example, the chemical with the CAS registration number 111-76-4 is listed as:

| name | source |
| --- |--- |
|ethanol, 2-butoxy-|SciFinder (primary name)|
|Ethanol, 2-butoxy-|EPA substance name|
|Ethylene glycol monobutyl ether| EPA registry name|
|Ethylene glycol monobutyl ether (2-butoxyethanol)| Elsner/Hoelzer's paper|
| 2-BE |common name|

In the table below, two names are given for each material, but many others are often used. That might be a problem if you are trying to find something by a different name.  If you are not having luck in the table below, try exploring the synonyms table for the most appropriate CAS Number.

|Explanation of columns in the index|
| :---: |

| Column      | Description |
| :----: | :-------- |
|*Material*| at top is the CAS registration number of the chemical. **Click on this link** to display summary of this chemical for all of FracFocus (filtered data - up to last major update). Below are two names for the material: the upper is used by SciFinder as the primary name and the lower is the most common reported name in FracFocus| 
|*ref*|Click on the "csv" link to a csv-formatted file that has all the filtered records for this chemical.  Click on "ChemID" link for the ChemID website for this chemical.  Click on "EPA: CompTox" for the summary page in that resource|
|*Molecule*| is the chemical structure (credit: ChemID)|
|*total num records* |indicates the number of records across the FracFocus data. A given chemical may appear more than once in a disclosure.  The upper number is the number of records in the **filtered** data, and the lower value is the number of records in the **unfiltered** data.|
|*num records with mass*|indicates the number of records with mass of this chemical in the **filtered** data. Without mass, a record only records presence in the disclosure.|
|*mass of 90th percentile (lbs)*| the top 10% of records in the **filtered** data have mass equal to or greater than this value|
|*on external list* |indicates if the chemical is on one of the following lists.  Type the name of the list into the Search box to limit to those chemicals.<br> - **CWA**: indicates that the chemical is on the [Clean Water Act list](https://comptox.epa.gov/dashboard/chemical_lists/CWA311HS) as compiled in EPA's CompTox<br>- **DWSHA**: indicated that the chemical is on the EPA's [Drinking Water Safety and Health Advisory](https://comptox.epa.gov/dashboard/chemical_lists/EPADWS) list<br>- **TEDX**: indicates that the chemical is on [The Endocrine Disruption Exchange](https://endocrinedisruption.org) list<br>- **prop_65**: indicates that the chemical is on [California's Proposition 65](https://oehha.ca.gov/proposition-65/proposition-65-list) list<br>- **EPA_PFAS**: indicates that the chemical is on EPA's comprehensive list of PFAS related compounds<br>- **EPA_volatile**: indicates that the chemical is on the EPA's volatile chemical list|

