# Open-FF chemical index

### FracFocus download: June 6, 2021

In [None]:
from math import log10, floor
import core.get_google_map as ggmap

def round_sig(x, sig=2):
    try:
        if abs(x)>=1:
            out =  int(round(x, sig-int(floor(log10(abs(x))))-1))
            return f"{out:,d}" # does the right thing with commas
        else: # fractional numbers
            return str(round(x, sig-int(floor(log10(abs(x))))-1))
    except:
        return x
    
# used to insert links of google maps into tables
def make_clickable(val):
    try:
        if val[:4]=='http':
            return '<a href="{}" target="_blank">{}</a>'.format(val,'map')
    except:
        return val
    return val

def getLink(row):
    return ggmap.getSearchLink(row.Latitude,row.Longitude)

def getCatLink(cas,text_to_show='Analysis'):
#    s = f'https://qbobioyuz1dh57rst8exeg-on.drv.tw/open_FF_catalog/{cas}/analysis_{cas}.html'
#    s = f'file://C:/MyDocs/sandbox/data/O&G/open-FF/tmp/website/{cas}/analysis_{cas}.html'
    s = f'{cas}/analysis_{cas}.html'
    return ggmap.wrap_URL_in_html(s,text_to_show)

def getDataLink(cas):
    s = f'{cas}/data.csv'
    return ggmap.wrap_URL_in_html(s,'csv file')

def getChemIDLink(cas):
    s = f'https://chem.nlm.nih.gov/chemidplus/rn/{cas}'
    return ggmap.wrap_URL_in_html(s,'ChemID')


def xlate_to_str(inp,sep=' ',trunc=False,tlen=20):
    try:
        l = list(inp)
        out = ''
        for s in l:
            if trunc:
                if len(s)>tlen:
                    s = s[:tlen-3]+ '...'
            out+= s+sep
    except:
        return ''
    return out[:-(len(sep))]

In [None]:
import pandas as pd
import numpy as np
from itables import init_notebook_mode
init_notebook_mode(all_interactive=True)
from itables import show as iShow
import itables.options as opt

import core.Analysis_set as ana_set
#import core.Construct_set as const_set
import matplotlib.pyplot as plt
%matplotlib inline
pd.set_option('display.max_rows', 500)

df_cas = ana_set.Catalog_set().get_set()


This interactive table below lists all chemicals (from the Open-FF **filtered** data set) published in FracFocus. Some chemicals are listed that have no records in the filtered set, but are in the full, raw data.  

The table is sort-able by any column (click a second time for reverse sort) and use the **Search** bar to limit what is shown in the table. 

**Important note about chemical names**: most chemical compounds have a number of names they are known by.  For example, the chemical with the CAS registration number 111-76-4 is listed as:

| name | source |
| --- |--- |
|ethanol, 2-butoxy-|SciFinder (primary name)|
|Ethanol, 2-butoxy-|EPA substance name|
|Ethylene glycol monobutyl ether| EPA registry name|
|Ethylene glycol monobutyl ether (2-butoxyethanol)| Elsner/Hoelzer's paper|
| 2-BE |common name|

In the table below, only the primary SciFinder name is listed.  That might be a problem if you are trying to find something by a different name.  For example, the name **xylene** is not even used in our SciFinder list, but there are several compounds that have "xylene" in their synonyms.  If you are not having luck in the table below, try exploring the synonyms table for the most appropriate CAS Number.

|Explanation of columns in the index|
| :---: |

| Column      | Description |
| :----: | :-------- |
|*CAS Number*| is the CAS registration number of the chemical. **Click on this link** to display summary of this chemical for all of FracFocus (filtered data - up to last major update); Click on the "csv" link to a csv-formatted file that has all the filtered records for this chemical.  Click on "ChemID" link for the ChemID website for this chemical|  
|*Name*| is the primary name for the material given by SciFinder|
|*total num records* |indicates the number of records across the FracFocus data. A given chemical may appear more than once in a disclosure.  The upper number is the number of records in the **filtered** data, and the lower value is the number of records in the **unfiltered** data.|
|*num records with mass*|indicates the number of records with mass of this chemical in the **filtered** data. Without mass, a record only records presence in the disclosure.|
|*mass of 90th percentile (lbs)*| the top 10% of records in the **filtered** data have mass equal to or greater than this value|
|*on TEDX list* |indicates if the chemical is on [The Endocrine Disruption Exchange](https://endocrinedisruption.org) list|
|*on Prop 65 list* |indicates if the chemical is on [California's Proposition 65](https://oehha.ca.gov/proposition-65/proposition-65-list) list|
|*on CWA SDWA lists* |indicates if the chemical is on the Safe Drinking Water Act list or the Clean Water Act list (compiled by the Wylie Lab)|


# Index of FracFocus chemicals


In [None]:
YES = 'Yes'; NO = ''
chem_df = df_cas.groupby('bgCAS',as_index=False)[['UploadKey']].count()
chem_df = chem_df.rename({'UploadKey':'numRecords'},axis=1)
gb1 = df_cas[df_cas.in_std_filtered].groupby('bgCAS',as_index=False)[['UploadKey']].count()
chem_df = pd.merge(chem_df,gb1,on='bgCAS',how='left')
chem_df.fillna(0,inplace=True)
chem_df.numRecords = chem_df.UploadKey.astype('int').astype('str') + '<br>'+ chem_df.numRecords.astype('str')
t = df_cas[(df_cas.calcMass>0)&(df_cas.in_std_filtered)].groupby('bgCAS',as_index=False)[['UploadKey']].count()
t = t.rename({'UploadKey':'numWithMass'},axis=1)

chem_df = pd.merge(chem_df,t,on='bgCAS',how='left')
chem_df.numWithMass.fillna(0,inplace=True)

t = df_cas.groupby('bgCAS')[['bgIngredientName','is_on_TEDX','is_on_prop65','is_on_CWA_SDWA']].first()
t.is_on_TEDX = np.where(t.is_on_TEDX,YES,NO)
t.is_on_prop65 = np.where(t.is_on_prop65,YES,NO)
t.is_on_CWA_SDWA = np.where(t.is_on_CWA_SDWA,YES,NO)
chem_df = pd.merge(chem_df,t,on='bgCAS',how='left')

t = df_cas[(df_cas.calcMass>0)&(df_cas.in_std_filtered)].groupby('bgCAS',as_index=False)['calcMass'].apply(np.percentile,90)
t.calcMass = t.calcMass.map(lambda x: round_sig(x,3))
chem_df = pd.merge(chem_df,t,on='bgCAS',how='left')
chem_df.calcMass.fillna(0,inplace=True)
#chem_df = chem_df.rename({'calcMass':'Mass of 90th percentile (lbs)'},axis=1)

chem_df['Filtered Data'] = chem_df.bgCAS.map(lambda x: getDataLink(x))
chem_df['History'] = chem_df.bgCAS.map(lambda x: getCatLink(x,x))
chem_df['ChemID'] = chem_df.bgCAS.map(lambda x: getChemIDLink(x))
#opt.classes = ['display','compact']
opt.classes = ['display','cell-border']
#chem_df.bgCAS = chem_df.bgCAS+'</b>'
chem_df.bgCAS = '<h3>'+chem_df.History+'</h3><br>'+chem_df['Filtered Data']+'  ;  '+chem_df.ChemID
chem_df = chem_df[['bgCAS','bgIngredientName','numRecords','numWithMass','calcMass','is_on_TEDX','is_on_prop65',
                   'is_on_CWA_SDWA',]]
                   #'History',
                   #'Filtered Data','ChemID']] #,'eh_Class_L1','eh_Class_L2']]
chem_df = chem_df.rename({'bgCAS':'CAS Number','bgIngredientName':'Name', 'numRecords':'total num records',
                          'numWithMass':'num records with mass','calcMass':'Mass of 90th percentile (lbs)',
                          'is_on_TEDX':'on TEDX list','is_on_prop65':'on Prop 65 list',
                          'is_on_CWA_SDWA':'on CWA SDWA lists'},
                          #'eh_Class_L1':'eh Class lvl 1','eh_Class_L2':'eh Class lvl 2'},
                         axis=1)
iShow(chem_df.reset_index(drop=True),maxBytes=0,columnDefs=[{"width": "100px", "targets": ["Name","CAS Number"]}])
#iShow(chem_df.reset_index(drop=True),maxBytes=0)
