In [1]:
import pandas as pd

# Notebook for adding New compounds to table

### 1. Run this cell to load the table

In [2]:
# load table from parquet file
compound_table = pd.read_parquet('compound_table.parquet')

# sort values by unique idea so last has highest id
compound_table.sort_values('id', ascending=True, inplace=True)

# electrochem_table = pd.read_parquet('electrochem_table.parquet')
# photochem_table = pd.read_parquet('photochem_table.parquet')
# solubility_table = pd.read_parquet('solubility_table.parquet')


### 2. Enter the details of the compound you want to add below, then run the cell

In [39]:
compound_details = {
                  'name': 'triphenylphosphine', 
                  'formula': '(C6H5)P3', 
                  'molecular_weight': 262, 
                  'CAS': '603-35-0', 
                  'source_dois': 'https://doi.org/10.1016/j.crci.2015.11.026, https://www.sigmaaldrich.com/GB/en/substance/triphenylphosphine26229603350, ', 
                  'recyclable': 0, 
                  'type': 'electron donor', 
                  'compound_family': 'phosphine', 
                  'used_in_photocat': 1, 
                  'used_in_rfbs': 0, 
                  'used_as_hcarrier': 0}

### 3. Run this cell to add your compound to the table
This code includes a duplication check.

In [40]:
# create unique compound id and add compound

# if the table is empty add the compound with id 1
if compound_table.empty:
    compound_details['id'] = int(1)
    compound_table = pd.concat([compound_table, pd.DataFrame([compound_details,])])
    
    
else:
    # check if compounds already exist with the same combination of name, formula and CAS
    check_mask = compound_table.isin([compound_details['name'], compound_details['formula'], compound_details['CAS']])
    check_mask = check_mask.loc[(check_mask.name==True) & (check_mask.formula==True), :]
    
    # if the CAS is a blank string ignore CAS in your checks
    if compound_details['CAS'] == '':
        check_mask.CAS = False
    
    
    # if no compounds with matching name, formula and CAS are found add one to the last id number and add the compound with that id
    if check_mask.any(axis=1).empty:
        # this assumes table always ordered ascending to make addition faster
        compound_details['id'] = int(compound_table.id.iat[-1] + 1)
        compound_table = pd.concat([compound_table, pd.DataFrame([compound_details,])])
        
    else:
        # if matches are found print them with a warning message and don't add the compound yet
        print(f"The following compounds appear to match the one you are trying to add {check_table.name.to_list()}.")
        print("To edit an existing entry please use the compound editor notebook. Otherwise please change the name or formula of your compound to add it.")
        display(compound_table.loc[check_mask.any(axis=1), :])
        print("Your compound has not yet been added")
    

#### (Execute next cell to preview last 5 entries)

In [41]:
# code to preview last five entries
if compound_table.shape[0] > 6:
    display(compound_table.tail(5))
else:
    display(compound_table)


Unnamed: 0,id,name,formula,molecular_weight,CAS,source_dois,recyclable,type,compound_family,used_in_photocat,used_in_rfbs,used_as_hcarrier
0,4,4-dimethylaminotoluene (DMT),CH3C6H4N(CH3)2,135,99-97-8,"https://doi.org/10.1016/j.crci.2015.11.026, si...",0,electron donor,amine,1,0,0
0,5,"1,3-dimethyl-2-phenylbenzimidazoline (BIH)",C15H16N2,224,3652-92-4,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,imidazole,1,0,0
0,6,L-ascorbic acid (vitamin C),C6H8O6,176,50-81-7,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,organic acid,1,0,0
0,7,oxalate,[C2O4]-2,88,338-70-5,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,organic anion,1,0,0
0,8,triphenylphosphine,(C6H5)P3,262,603-35-0,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,phosphine,1,0,0


### 4. Run the next cell to save your changes to the table 

In [42]:
compound_table.to_parquet('compound_table.parquet', engine='pyarrow', compression=None)