# Make lists of all Solvents and Catalysts (+Reagents)
This jupyter notebook creates lists of e.g. used solvents within the found reactions

## Libraries

In [1]:
from os import listdir
from os.path import isfile, join
import pandas as pd
import collections

## Solvents

In [2]:
#open all compound reaction files
onlyfiles = [f for f in listdir('../data/raw/compounds/') if isfile(join('../data/raw/compounds/', f)) if 'DS_' not in f]

#go through all files
all_solvent = []
for file in onlyfiles:
    df = pd.read_excel('../data/raw/compounds/'+file)
    all_solvent.extend( df['Solvent (Reaction Details)'].unique())

#save all unique used solvents within all found reactions
all_solvent = list(set(all_solvent))
all_solvent = [x for x in all_solvent if str(x) != 'nan']

print ('Number of solvents: %d' %len(all_solvent))

Number of solvents: 25


In [3]:
#Output all used solvents
fp_out = open('../results/1_Solvent_Catalyst_Lists/List_of_all_solvents.tsv','w')
for solvent in all_solvent:
    print (solvent)
    fp_out.write(solvent+'\n')
fp_out.close()

ethyl acetate
water; toluene; acetonitrile
methanol; water
ethanol; ethanol
water; acetonitrile
nitromethane; water
glycerol
1,4-dioxane; N,N-dimethyl-formamide
acetic acid
toluene
dimethyl sulfoxide
methanol
acetonitrile
1,2-dichloro-ethane
ethyl acetate; N,N-dimethyl-formamide
dichloromethane
chlorobenzene
water
ethanol; water
methanol; chloroform
water; glycerol
methanol; acetic acid
chloroform
neat (no solvent)
ethanol


## Catalysts + Reagents

In [4]:
#open all compound reaction files
onlyfiles = [f for f in listdir('../data/raw/compounds/') if isfile(join('../data/raw/compounds/', f)) if 'DS_' not in f]

#lists that contains all reagents/catalysts
all_reagents_and_catalysts = []

#go through all files
for file in onlyfiles:
    print (file)
    #open the file as pandas dataframe
    df = pd.read_excel('../data/raw/compounds/'+file)
    
    #get unique reagent and catalyst
    reagents =  df['Reagent'].unique()
    catalysts =  df['Catalyst'].unique()

    #add to list
    all_reagents_and_catalysts.extend(reagents)
    all_reagents_and_catalysts.extend(catalysts)
    
#keep only unqie
all_reagents_and_catalysts = list(set(all_reagents_and_catalysts))
all_reagents_and_catalysts = [x for x in all_reagents_and_catalysts if str(x) != 'nan']

print ('Number of reagents and catalysts: %d' %len(all_reagents_and_catalysts))

Reaxys_Compound_6.xlsx
Reaxys_Compound_7.xlsx
Reaxys_Compound_10.xlsx
Reaxys_Compound_1.xlsx
Reaxys_Compound_11.xlsx
Reaxys_Compound_2.xlsx
Reaxys_Compound_12.xlsx
Reaxys_Compound_13.xlsx
Reaxys_Compound_3.xlsx
Reaxys_Compound_4.xlsx
Reaxys_Compound_14.xlsx
Reaxys_Compound_8.xlsx
Reaxys_Compound_9.xlsx
Reaxys_Compound_5.xlsx
Number of reagents and catalysts: 122


In [5]:
#save list of reagents/catalysts
fp_out = open('../results/1_Solvent_Catalyst_Lists/List_of_all_reagents_and_catalysts.tsv','w')
for r_c in all_reagents_and_catalysts:
    print (r_c)
    fp_out.write(r_c+'\n')
fp_out.close()

samarium(III) trifluoromethanesulfonate
TiO2-P25-SO42-
ZrO2?Al2O3 catalyst
mesoporous silica SBA-15 functionalized with Cu(II)-DiAmSar complex
solid phase supported zirconium(IV) complex
pyridine; copper(l) iodide
polyethylene glycol-400
ammonium bromide
zinc trifluoromethanesulfonate
oxygen; triethylamine
1-butyl-3-methylimidazolium Tetrafluoroborate
acetic acid
hydrogen fluoride
ZnO-loaded mesoporous silica (KIT-6) (aged at 130 °C and containing 10 wtpercent ZnO)
dibromotrimethoxyphosphorane; triethylamine
zirconium triflate
nanoparticle-supported cobalt catalyst; air
1,4-diaza-bicyclo[2.2.2]octane; acetic acid
Graphite
ZrOL2(at)SMNP
Thiamine hydrochloride
1-n-butyl-3-methylimidazolium methanesulfonate
1,1,1,3',3',3'-hexafluoro-propanol
bismuth(lll) trifluoromethanesulfonate
lithium chloride
tungstate sulfuric acid
lead(II) chloride
C20H24Cl2N2O2Zr
tin (IV) chloride pentahydrate
silica nanosphere-graphene oxide hybrid
aluminum oxide
toluene-4-sulfonic acid
molybdenum(VI) trioxide
vit

## Other conditions

In [6]:
#open all compound reaction files
onlyfiles = [f for f in listdir('../data/raw/compounds/') if isfile(join('../data/raw/compounds/', f)) if 'DS_' not in f]

#lists that contains all reagents/catalysts
all_conditions = []

#go through all files
for file in onlyfiles:
    #file = 'Reaxys_Compound_1.xlsx'
    print (file)
    df = pd.read_excel('../data/raw/compounds/'+file)
    
    #get unique reagent and catalyst
    conditions =  df['Other Conditions'].unique()

    #add to list
    all_conditions.extend(conditions)

    
Count = collections.Counter(all_conditions)

#check categories and output those that are at least >5 times used
keep_categories =[]
for c in Count.most_common(50):
    if str(c[0]) != 'nan' and ';' not in str(c[0]):
        
        if c[1] >=5:
            keep_categories.append(c[0])

print (keep_categories)
print ('Number of conditions: %d' %len(keep_categories))

Reaxys_Compound_6.xlsx
Reaxys_Compound_7.xlsx
Reaxys_Compound_10.xlsx
Reaxys_Compound_1.xlsx
Reaxys_Compound_11.xlsx
Reaxys_Compound_2.xlsx
Reaxys_Compound_12.xlsx
Reaxys_Compound_13.xlsx
Reaxys_Compound_3.xlsx
Reaxys_Compound_4.xlsx
Reaxys_Compound_14.xlsx
Reaxys_Compound_8.xlsx
Reaxys_Compound_9.xlsx
Reaxys_Compound_5.xlsx
['Reflux', 'Green chemistry', 'Neat (no solvent)', 'Sonication', 'neat (no solvent)', 'Heating', 'Irradiation', 'Microwave irradiation']
Number of conditions: 8


In [7]:
#save list of conditions
fp_out = open('../results/1_Solvent_Catalyst_Lists/List_of_interesting_conditions.tsv','w')
for r_c in keep_categories:
    print (r_c)
    fp_out.write(r_c+'\n')
fp_out.close()

Reflux
Green chemistry
Neat (no solvent)
Sonication
neat (no solvent)
Heating
Irradiation
Microwave irradiation
