# SMILES lookup

Using the SMILES data extracted from Wikipedia by [Ertl et al (2015)](https://jcheminf.biomedcentral.com/articles/10.1186/s13321-015-0061-y), and matching by molecule name to the data extracted from [PsychonautWiki](https://psychonautwiki.org/wiki/PsychonautWiki).

In [30]:
import pandas as pd

print(f"Pandas version {pd.__version__}")

Pandas version 1.2.4


In [71]:
psychonaut = pd.read_csv("psychonaut.csv", converters={i: str for i in range(5)})
psychonaut

Unnamed: 0,name,url,class0,class1,class2
0,"1,4-Butanediol","https://psychonautwiki.org/wiki/1,4-Butanediol",Depressant,,
1,1B-LSD,https://psychonautwiki.org/wiki/1B-LSD,Psychedelics,,
2,1P-LSD,https://psychonautwiki.org/wiki/1P-LSD,Psychedelics,,
3,1cP-LSD,https://psychonautwiki.org/wiki/1cP-LSD,Psychedelics,,
4,2-Aminoindane,https://psychonautwiki.org/wiki/2-Aminoindane,Stimulants,,
...,...,...,...,...,...
254,4-HO-DiPT,https://psychonautwiki.org/wiki/4-HO-DiPT,Psychedelics,,
255,2C-T-21,https://psychonautwiki.org/wiki/2C-T-21,Psychedelics,,
256,2C-T-7,https://psychonautwiki.org/wiki/2C-T-7,Psychedelics,,
257,DOI,https://psychonautwiki.org/wiki/DOI,Psychedelics,,


In [80]:
wikipedia = pd.read_csv("https://www.cheminfo.org/wikipedia/smiles.txt", header=0, names=["name", "smiles"], sep='\t')
wikipedia = wikipedia.drop_duplicates(subset=['name'])
wikipedia

Unnamed: 0,name,smiles
0,Aspirin,O=C(C)Oc1ccccc1C(=O)O
1,Acetylene,C#C
2,Adenosine triphosphate,O=P(O)(O)OP(=O)(O)OP(=O)(O)OC[C@H]3O[C@@H](n2c...
3,Ampicillin,CC1(C(N2C(S1)C(C2=O)NC(=O)C(C3=CC=CC=C3)N)C(=O...
4,Chemistry of ascorbic acid,C([C@@H]([C@@H]1C(=C(C(=O)O1)O)O)O)O
...,...,...
18616,Copper oxalate,C(=O)(C(=O)O)O.[Cu+2]
18617,4-PrO-DMT,CCC(=O)OC1=CC=CC2=C1C(=CN2)CCN(C)C
18618,Phenylcobalt,C1=CC=[C-]C=C1.[Co]
18619,Praseodymium nitride,N#[Pr]


In [81]:
# Merge using keys on from the psychonaut (left) dataframe, 
# to match on the "name" column with the Wikipedia SMILES data.

matched = pd.merge(psychonaut, wikipedia, on="name", how="left", indicator=True, validate="one_to_one")
matched

Unnamed: 0,name,url,class0,class1,class2,smiles,_merge
0,"1,4-Butanediol","https://psychonautwiki.org/wiki/1,4-Butanediol",Depressant,,,C(CCO)CO,both
1,1B-LSD,https://psychonautwiki.org/wiki/1B-LSD,Psychedelics,,,CN1[C@](C2=C[C@@H](C(N(CC)CC)=O)C1)([H])CC3=CN...,both
2,1P-LSD,https://psychonautwiki.org/wiki/1P-LSD,Psychedelics,,,CCN(CC)C(=O)[C@H]1CN(C)[C@@H]2Cc3cn(C(=O)CC)c4...,both
3,1cP-LSD,https://psychonautwiki.org/wiki/1cP-LSD,Psychedelics,,,CCN(CC)C(=O)[C@@H]5C=C2[C@@H](Cc3cn(C(=O)C1CC1...,both
4,2-Aminoindane,https://psychonautwiki.org/wiki/2-Aminoindane,Stimulants,,,C1C(CC2=CC=CC=C21)N,both
...,...,...,...,...,...,...,...
254,4-HO-DiPT,https://psychonautwiki.org/wiki/4-HO-DiPT,Psychedelics,,,CC(C)N(CCc1c[nH]c2cccc(O)c12)C(C)C,both
255,2C-T-21,https://psychonautwiki.org/wiki/2C-T-21,Psychedelics,,,COc1cc(SCCF)c(cc1CCN)OC,both
256,2C-T-7,https://psychonautwiki.org/wiki/2C-T-7,Psychedelics,,,COc1cc(SCCC)c(cc1CCN)OC,both
257,DOI,https://psychonautwiki.org/wiki/DOI,Psychedelics,,,,left_only


In [84]:
unmatched = matched[matched["_merge"] == "left_only"] 
unmatched

Unnamed: 0,name,url,class0,class1,class2,smiles,_merge
5,2-FA,https://psychonautwiki.org/wiki/2-FA,Stimulants,,,,left_only
6,2-FEA,https://psychonautwiki.org/wiki/2-FEA,Stimulants,Entactogens,,,left_only
7,2-FMA,https://psychonautwiki.org/wiki/2-FMA,Stimulants,,,,left_only
25,2M2B,https://psychonautwiki.org/wiki/2M2B,Depressant,,,,left_only
26,3-FA,https://psychonautwiki.org/wiki/3-FA,Stimulants,,,,left_only
...,...,...,...,...,...,...,...
248,MiPLA,https://psychonautwiki.org/wiki/MIPLA,Psychedelics,,,,left_only
252,MET,https://psychonautwiki.org/wiki/MET,Psychedelics,,,,left_only
253,EPT,https://psychonautwiki.org/wiki/EPT,Psychedelics,,,,left_only
257,DOI,https://psychonautwiki.org/wiki/DOI,Psychedelics,,,,left_only


In [85]:
 matched.to_csv("index.csv", index=False)