In [None]:
#@title 🏃‍♀️ Install Ersilia on Colab


%%capture
%env MINICONDA_INSTALLER_SCRIPT=Miniconda3-py37_4.12.0-Linux-x86_64.sh
%env MINICONDA_PREFIX=/usr/local
%env PYTHONPATH= "$PYTHONPATH:/usr/local/lib/python3.7/site-packages"
%env PIP_ROOT_USER_ACTION=ignore

!wget https://repo.anaconda.com/miniconda/$MINICONDA_INSTALLER_SCRIPT
!chmod +x $MINICONDA_INSTALLER_SCRIPT
!./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX

!python -m pip install git+https://github.com/ersilia-os/ersilia.git
!python -m pip install requests --upgrade
import sys
_ = (sys.path.append("/usr/local/lib/python3.7/site-packages"))

In [None]:
#@title 🔗 Connect Colab to your Google Drive

## Mount google drive
from google.colab import drive
drive.mount('/content/drive')

## Click on the folder/directory icon to the left 
##      and make sure your drive is mounted

Mounted at /content/drive


<div class="markdown-google-sans">

### 📩 Specify your file paths and the input .csv file
</div>

<div>

*  Make sure you have a .csv file containing a list of molecules represented as SMILES string
* Specify the exact column name of your smiles column in the .csv file
*  Specify the path to your file in Google Drive
*  Specify the path where you want to store the results in Google Drive 

</div> 

In [None]:
#specify your input folder

input_folder="/content/drive/MyDrive/Ersilia" #@param {type:"string"}

In [None]:
#specify your output folder

output_folder="/content/drive/MyDrive/Ersilia" #@param {type:"string"}

In [None]:
#specify the name of your input csv file

input_filename ="eml_canonical" #@param {type:"string"}

In [None]:
#specify the SMILES column name
smiles_column = "can_smiles" #@param {type:"string"}




In [None]:
#@title 🔤 Extract SMILES to a list
import pandas as pd

path = "%s/%s.csv" % (input_folder,input_filename)
smi_col = "%s" % smiles_column
df = pd.read_csv(path)
smiles = df[smi_col].tolist()

print("My dataset contains"+" "+ str(len(smiles))+" "+ "SMILES")

My dataset contains 442 SMILES


<div class="markdown-google-sans">

#### 🚀 Use the Ersilia Model Hub to run predictions for your molecules. Check all available models [here](https://ersilia.io/model-hub)
</div>

1. Enter the model name
2. Fetch models
3. Make predictions or calculations
4. Save the result to a .csv file in your google drive.

In [None]:
# Enter the model name
# This is just an example, make sure to adapt it to your specific model 

model_name = "eos4q1a" #@param {type:"string"}

In [None]:
#@title 📥 Fetch the model

!ersilia -v fetch $model_name

[34m⬇️  Fetching model eos4q1a: crem-structure-generation[0m
10:11:52 | INFO     | GitHub CLI is not installed. Ersilia can work without it, but we highy recommend that you install this tool.
10:11:52 | DEBUG    | Git LFS is installed
Error: Failed to call git rev-parse --git-dir --show-toplevel: "fatal: not a git repository (or any of the parent directories): .git\n"
Git LFS initialized.
10:11:52 | DEBUG    | Git LFS has been activated
10:11:52 | DEBUG    | Connected to the internet
10:11:52 | DEBUG    | Conda is installed
10:11:52 | DEBUG    | EOS Home path exists
10:11:52 | INFO     | Starting delete of model eos4q1a
10:11:54 | INFO     | Deleting conda environment eos4q1a
10:11:55 | DEBUG    | Deleting /root/eos/isaura/lake/eos4q1a_local.h5
10:11:55 | DEBUG    | Deleting /root/eos/isaura/lake/eos4q1a_public.h5
10:11:55 | DEBUG    | Model entry eos4q1a was not available in the fetched models registry
10:11:55 | SUCCESS  | Model eos4q1a deleted successfully
10:11:55 | DEBUG    | Cl

In [None]:
#@title ⚙️ Serve the model

# returns available APIs (calculate or predict)

!ersilia serve $model_name

[32m🚀 Serving model eos4q1a: crem-structure-generation[0m
[0m
[33m   URL: http://127.0.0.1:38523[0m
[33m   PID: 3803[0m
[33m   SRV: conda[0m
[0m
[34m👉 Available APIs:[0m
[34m   - generate[0m


In [None]:
#@title ⚡ Run predictions
api = "generate" #@param {type:"string"}

from ersilia import ErsiliaModel
import time

model = ErsiliaModel(model_name)
begin = time.time()
output = model.api(input=smiles, output="pandas")
end = time.time()

print('Successful 👍! Time taken:', round((end - begin),2),'seconds')
model.close()

Successful 👍! Time taken: 7496.55 seconds


  self[col] = igetitem(value, i)


In [None]:
#@title ✔️ Check your results

print(output.head())
print(output.shape)

                           key  \
0  MCGSCOLBFJQGHM-SCZZXKLOSA-N   
1  GZOSMCIZMLWJML-VJLLXTKPSA-N   
2  BZKPWHYZMXOIDC-UHFFFAOYSA-N   
3  QTBSBXVTEAMEQO-UHFFFAOYSA-N   
4  PWKSKIMOESPYIA-BYPYZUCNSA-N   

                                               input  \
0      Nc1nc(NC2CC2)c2ncn([C@H]3C=C[C@@H](CO)C3)c2n1   
1  C[C@]12CC[C@H]3[C@@H](CC=C4C[C@@H](O)CC[C@@]43...   
2                         CC(=O)Nc1nnc(S(N)(=O)=O)s1   
3                                            CC(=O)O   
4                            CC(=O)N[C@@H](CS)C(=O)O   

                                           gen_mol_0  \
0      CC[C@@H]1C=C[C@H](n2cnc3c(NC4CC4)nc(N)nc32)C1   
1  COc1ccc(C2=CC[C@H]3[C@@H]4CC=C5C[C@@H](O)CC[C@...   
2                              CC(=O)CC(=O)Nc1ccccc1   
3                                            CC=CC=O   
4                                Nc1ccc(C(=O)O)cc1Br   

                                           gen_mol_1  \
0   Nc1nc(NC2CC2)c2ncn([C@H]3C=C[C@@H](CON=O)C3)c2n1   
1  C[C@]1