In [1]:
import pandas as pd

# Notebook for adding New compounds to table

### 1. Run this cell to load the table

In [20]:
# load table from parquet file
compound_table = pd.read_parquet('compound_table.parquet')

# sort values by unique idea so last has highest id
compound_table.sort_values('id', ascending=True, inplace=True)

# electrochem_table = pd.read_parquet('electrochem_table.parquet')
# photochem_table = pd.read_parquet('photochem_table.parquet')
# solubility_table = pd.read_parquet('solubility_table.parquet')


### 2. Enter the details of the compound you want to add below, then run the cell

In [22]:
compound_details = {
                  'name': 'dimethyldithiocarbamate (M-)', 
                  'formula': 'C3H6NS2-', 
                  'molecular_weight': 120, 
                  'CAS': '137-30-4', 
                  'source_dois': 'DOI: 10.1039/c2ta00719c, https://pubchem.ncbi.nlm.nih.gov/compound/3566770', 
                  'recyclable': 1, 
                  'type': 'electron donor', 
                  'compound_family': 'disulfide forming', 
                  'used_in_photocat': 0, 
                  'used_in_rfbs': 0, 
                  'used_as_hcarrier': 0,
                'used_in_DSSC': 1,
}

### 3. Run this cell to add your compound to the table
This code includes a duplication check.

In [23]:
# create unique compound id and add compound

# if the table is empty add the compound with id 1
if compound_table.empty:
    compound_details['id'] = int(1)
    compound_table = pd.concat([compound_table, pd.DataFrame([compound_details,])])
    
    
else:
    # check if compounds already exist with the same combination of name, formula and CAS
    check_mask = compound_table.isin([compound_details['name'], compound_details['formula'], compound_details['CAS']])
    check_mask = check_mask.loc[(check_mask.name==True) & (check_mask.formula==True), :]
    
    # if the CAS is a blank string ignore CAS in your checks
    if compound_details['CAS'] == '':
        check_mask.CAS = False
    
    
    # if no compounds with matching name, formula and CAS are found add one to the last id number and add the compound with that id
    if check_mask.any(axis=1).empty:
        # this assumes table always ordered ascending to make addition faster
        compound_details['id'] = int(compound_table.id.iat[-1] + 1)
        compound_table = pd.concat([compound_table, pd.DataFrame([compound_details,])])
        
    else:
        # if matches are found print them with a warning message and don't add the compound yet
        print(f"The following compounds appear to match the one you are trying to add {check_table.name.to_list()}.")
        print("To edit an existing entry please use the compound editor notebook. Otherwise please change the name or formula of your compound to add it.")
        display(compound_table.loc[check_mask.any(axis=1), :])
        print("Your compound has not yet been added")
    

#### (Execute next cell to preview last 5 entries)

In [24]:
# code to preview last five entries
if compound_table.shape[0] > 6:
    display(compound_table.tail(5))
else:
    display(compound_table)


Unnamed: 0,id,name,formula,molecular_weight,CAS,source_dois,recyclable,type,compound_family,used_in_photocat,used_in_rfbs,used_as_hcarrier,used_in_DSSC
0,43,"9,10-Dihydro-10-methyl-9-phenylacridine",C20H17N,271.0,56875-26-4,"DOI: 10.1039/C9CC00928K, https://pubchem.ncbi....",1,electron donor,acridine,1,0,0,
0,44,iridium triphenylpyridine Ir(ppy)3,,0.0,,DOI: 10.1038/nature23016,1,photosensitizer,organometallic,1,0,0,
0,45,"2-mercapto-5-methyl-1,3,4-thiadiazole (McMT)",C3H4N2S2,132.0,29490-19-5,"DOI: 10.1002/anie.201003740, https://pubchem.n...",1,electron donor,disulfide forming,0,0,0,1.0
0,46,tetramethylthiourea (TMTU),C5H12N2S,132.0,2782-91-4,"DOI: 10.1002/adfm.201000150, https://pubchem.n...",1,electron donor,disulfide forming,0,0,0,1.0
0,47,dimethyldithiocarbamate (M-),C3H6NS2-,120.0,137-30-4,"DOI: 10.1039/c2ta00719c, https://pubchem.ncbi....",1,electron donor,disulfide forming,0,0,0,1.0


### 4. Run the next cell to save your changes to the table 

In [25]:
compound_table.to_parquet('compound_table.parquet', engine='pyarrow', compression=None)

In [6]:
compound_table.type.unique()

array(['electron donor', 'electron acceptor', 'photosensitizer'],
      dtype=object)

In [26]:
display(compound_table.loc[compound_table.compound_family=='disulfide forming', :])

Unnamed: 0,id,name,formula,molecular_weight,CAS,source_dois,recyclable,type,compound_family,used_in_photocat,used_in_rfbs,used_as_hcarrier,used_in_DSSC
0,33,5-mercapto-1-methyltetrazole (T-),C2H3N4S,115.0,,"DOI: 10.1038/nchem.610,",1,electron donor,disulfide forming,0,0,0,1.0
0,39,"1,2dimethyl-3-propyl-imidazol L-cysteine ([DMP...",2[C8H15N2]+[C3H5NO2S]2-,397.0,,DOI: 10.1039/C1EE02540F,1,electron donor,disulfide forming,0,0,0,1.0
0,45,"2-mercapto-5-methyl-1,3,4-thiadiazole (McMT)",C3H4N2S2,132.0,29490-19-5,"DOI: 10.1002/anie.201003740, https://pubchem.n...",1,electron donor,disulfide forming,0,0,0,1.0
0,46,tetramethylthiourea (TMTU),C5H12N2S,132.0,2782-91-4,"DOI: 10.1002/adfm.201000150, https://pubchem.n...",1,electron donor,disulfide forming,0,0,0,1.0
0,47,dimethyldithiocarbamate (M-),C3H6NS2-,120.0,137-30-4,"DOI: 10.1039/c2ta00719c, https://pubchem.ncbi....",1,electron donor,disulfide forming,0,0,0,1.0


In [4]:
#''C17H21N3''
12+17+21+14*3

92