In [1]:
import pandas as pd

# Notebook for adding New compounds to table

### 1. Run this cell to load the table

In [8]:
# load table from parquet file
compound_table = pd.read_parquet('compound_table.parquet')

# sort values by unique idea so last has highest id
compound_table.sort_values('id', ascending=True, inplace=True)

# electrochem_table = pd.read_parquet('electrochem_table.parquet')
# photochem_table = pd.read_parquet('photochem_table.parquet')
# solubility_table = pd.read_parquet('solubility_table.parquet')


### 2. Enter the details of the compound you want to add below, then run the cell

In [18]:
compound_details = {
                  'name': '[Cu(dpp)]+', 
                  'formula': '', 
                  'molecular_weight': 0, 
                  'CAS': '', 
                  'source_dois': 'Photochemistry and Photophysics concepts resarch applications. V. Balzani, P. Ceroni, A, Juris Ed. 1 Wiley-VCH 2014 Weinheim Germany page 199, ', 
                  'recyclable': 1, 
                  'type': 'photosensitizer', 
                  'compound_family': 'organometallic', 
                  'used_in_photocat': 1, 
                  'used_in_rfbs': 0, 
                  'used_as_hcarrier': 0}

### 3. Run this cell to add your compound to the table
This code includes a duplication check.

In [19]:
# create unique compound id and add compound

# if the table is empty add the compound with id 1
if compound_table.empty:
    compound_details['id'] = int(1)
    compound_table = pd.concat([compound_table, pd.DataFrame([compound_details,])])
    
    
else:
    # check if compounds already exist with the same combination of name, formula and CAS
    check_mask = compound_table.isin([compound_details['name'], compound_details['formula'], compound_details['CAS']])
    check_mask = check_mask.loc[(check_mask.name==True) & (check_mask.formula==True), :]
    
    # if the CAS is a blank string ignore CAS in your checks
    if compound_details['CAS'] == '':
        check_mask.CAS = False
    
    
    # if no compounds with matching name, formula and CAS are found add one to the last id number and add the compound with that id
    if check_mask.any(axis=1).empty:
        # this assumes table always ordered ascending to make addition faster
        compound_details['id'] = int(compound_table.id.iat[-1] + 1)
        compound_table = pd.concat([compound_table, pd.DataFrame([compound_details,])])
        
    else:
        # if matches are found print them with a warning message and don't add the compound yet
        print(f"The following compounds appear to match the one you are trying to add {check_table.name.to_list()}.")
        print("To edit an existing entry please use the compound editor notebook. Otherwise please change the name or formula of your compound to add it.")
        display(compound_table.loc[check_mask.any(axis=1), :])
        print("Your compound has not yet been added")
    

#### (Execute next cell to preview last 5 entries)

In [20]:
# code to preview last five entries
if compound_table.shape[0] > 6:
    display(compound_table.tail(5))
else:
    display(compound_table)


Unnamed: 0,id,name,formula,molecular_weight,CAS,source_dois,recyclable,type,compound_family,used_in_photocat,used_in_rfbs,used_as_hcarrier
0,34,"2,3-Dichloro-5,6-dicyano-1,4-benzoquinone (DDQ)",C8Cl2N2O2,227.0,84-58-2,"DOI: 10.1002/ejoc.202100011, https://pubchem.n...",1,electron donor,quinone,0,0,1
0,35,p-dimethylaminobenzaldehyde,C9H11NO,149.0,100-10-7,"https://doi.org/10.1016/S0022-0728(70)80304-7,...",1,electron donor,amine,0,0,0
0,36,"[Ru(bpy)3]2+ Tris(2,2-bipyridyl)ruthenium(II)...",C30H36Cl2N6O6Ru,749.0,50525-27-4,Photochemistry and Photophysics concepts resar...,1,photosensitizer,organometallic,1,0,0
0,37,[Ir(tpy)3]3+,,0.0,,Photochemistry and Photophysics concepts resar...,1,photosensitizer,organometallic,1,0,0
0,38,[Cu(dpp)]+,,0.0,,Photochemistry and Photophysics concepts resar...,1,photosensitizer,organometallic,1,0,0


### 4. Run the next cell to save your changes to the table 

In [21]:
compound_table.to_parquet('compound_table.parquet', engine='pyarrow', compression=None)

In [7]:
254+16+12+2

284

In [3]:
display(compound_table)

Unnamed: 0,id,name,formula,molecular_weight,CAS,source_dois,recyclable,type,compound_family,used_in_photocat,used_in_rfbs,used_as_hcarrier
0,1,triethylamine,(C2H5)3N,101.0,121-44-8,"https://doi.org/10.1016/j.crci.2015.11.026, si...",0,electron donor,amine,1,0,0
0,2,triethanolamine,(HOCH2CH2)3N,149.0,102-71-6,"https://doi.org/10.1016/j.crci.2015.11.026, si...",0,electron donor,amine,1,0,0
0,3,"N,N-dimethylaniline (DMA)",C6H5N(CH3)2,121.0,121-69-7,"https://doi.org/10.1016/j.crci.2015.11.026, si...",0,electron donor,amine,1,0,0
0,4,4-dimethylaminotoluene (DMT),CH3C6H4N(CH3)2,135.0,99-97-8,"https://doi.org/10.1016/j.crci.2015.11.026, si...",0,electron donor,amine,1,0,0
0,5,"1,3-dimethyl-2-phenylbenzimidazoline (BIH)",C15H16N2,224.0,3652-92-4,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,benzimidazole,1,0,1
0,6,L-ascorbic acid (vitamin C),C6H8O6,176.0,50-81-7,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,organic acid,1,0,0
0,7,oxalate,[C2O4]-2,88.0,338-70-5,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,organic anion,1,0,0
0,8,triphenylphosphine,(C6H5)P3,262.0,603-35-0,"https://doi.org/10.1016/j.crci.2015.11.026, ht...",0,electron donor,phosphine,1,0,0
0,9,ethylenediaminetetraacetic acid (EDTA),(HO2CCH2)2NCH2CH2N(CH2CO2H)2,292.0,60-00-4,"https://doi.org/10.1039/c3cp55023k, https://ww...",0,electron donor,organic acid,1,0,0
0,10,NADH,C21H29N7O14P2,665.0,58-68-4,"https://doi.org/10.1016/0302-4598(74)85011-7, ...",0,electron donor,nicotinamide biomimetic,1,0,0


In [48]:
#'C2H3N4S'
12*2+3+14*4+32

115