# How to run REINVENT

Clone the reinvent-benchmarking github repo. More details found there.

In [None]:
!git clone https://github.com/gkwt/reinvent-benchmarking.git

## Define fitness function from tartarus

Add the fitness function into the `custom.py` file of the repo. Default is the logP fitness function.

In [None]:
import sys, os
ROOT_DIR = '..'
sys.path.append(ROOT_DIR)
sys.path.append('reinvent-benchmarking')


from tartarus import pce

def fitness_function(smi: str):
    dipole, hl_gap, lumo, obj, pce_1, pce_2, sas = pce.get_properties(smi)
    return pce_1 - sas

## Get the imports

In [None]:
import os, sys
import pandas as pd

from data_structs import canonicalize_smiles_from_file, construct_vocabulary, write_smiles_to_file
from train_prior import pretrain
from train_agent import train_agent

## Prepare the dataset and vocabulary

In [None]:

data_path = os.path.join(ROOT_DIR, 'datasets')
filename = 'hce.csv'
sep = ','
header = 'infer'
smile_name = 'smiles'

# dataset load
fname = os.path.join(data_path, filename)
data = pd.read_csv(fname, sep=sep, header=header)
smiles = data[smile_name]

if not os.path.isdir('data'):
    os.mkdir('data')

# create smi file
with open(os.path.join('data', 'data.smi'), 'w') as f:
    for smi in smiles:
        f.write(smi+'\n')

smiles_file = 'data/data.smi'
print("Reading smiles...")
smiles_list = canonicalize_smiles_from_file(smiles_file)
print("Constructing vocabulary...")
voc_chars = construct_vocabulary(smiles_list)
write_smiles_to_file(smiles_list, "data/mols_filtered.smi")

## Pretrain the network

In [None]:
num_epochs = 100
verbose = False
train_ratio = 0.8

pretrain(num_epochs=num_epochs, verbose=verbose, train_ratio=train_ratio)

## Start climbing algorithm for REINVENT

In [None]:
train_agent(
    scoring_function='custom_score',
    batch_size = 500,
    n_steps = 10
)