# Example on how to use the code for fine-tuning your model


## Preliminary steps

### Paths and imports

Adds necessary repo folders to the path

In [7]:
import sys
sys.path.insert(1, '../preprocessing/') # preprocessing code
sys.path.insert(1, '../experiments/') # folder with configuration files
sys.path.insert(1, '../data/') # data code
sys.path.insert(1, '../model/') # model code

Imports

In [8]:
from main_preprocessor import preprocess_data #for data preprocessing
import configparser # to automatically change the .ini file
from sample import Sampler

## Model and data

Model settings

In [20]:
model = 'BIMODAL' # type of method to use
start = 'random' # position of the starting token ('fixed' or 'random')
aug_level = 1 # augmentation level (1 = no augmentation)
net_size = 512 # total number of hidden units

Fine tuning settings

In [10]:
fine_tuning = 'fine_tuning' #name and/or path to dataset for fine-tuning (csv or tar.xz)
epochs = 10 # fine tuning epochs (the higher the number, the closer you get to the fine-tuning compounds)

Sampling settings

In [11]:
T_sampling = 0.7 # temperature for sampling
n_sampling = 10 # smiles to sample
n_folds = 1 # number of CV folds to use for fine-tuning
sample_valid=True
sample_novel=True
sample_unique=True

## Preprocessing the fine tuning library

In [13]:
#preprocess
preprocess_data(filename_in='fine_tuning', model_type=model, starting_point=start, augmentation=1, max_len=74, min_len=34)
#info on the filename
fine_tuning_preprocessed = fine_tuning + '_' + model + '_' + start
print ('The new file can be found in ../data with the following name: "' + fine_tuning_preprocessed + '.csv".')

Pre-processing of "fine_tuning" started.
 invalid SMILES - removed.
 duplicate SMILES - removed.
 salts - removed.
 stereochemistry - removed.
 canonicalized SMILES.
(22,)
Data processed saved
The new file can be found in ../data with the following name: "fine_tuning_BIMODAL_random.csv".


## Fine tuning the model

Create a the configuration file and saves it as (["Example_FineTuning.ini"](../experiments/)) based on the user-defined settings

In [30]:
reference_name = model + '_' + start + '_' + str(net_size)

# name of the configuration file to use
exp_name = reference_name + '_FineTuning'
template_name = 'BIMODAL_random_512_FineTuning_template.ini' # file to use as template

# reads the config file from the template
config = configparser.ConfigParser()
config.sections()
config.read('../experiments/' + template_name) # starts from one of the templates

# changes the fields based on the specified options
config['MODEL']['model'] = model
if model is 'BIMODAL':
    config['MODEL']['hidden_units'] = str(net_size//4)
else:
    config['MODEL']['hidden_units'] = str(net_size//2)
        
config['DATA']['data'] = fine_tuning_preprocessed
config['TRAINING']['epochs'] = str(epochs) 

# picks one of our pre-trained models that are provided in the repo. 
# If the SMILES preprocessing changes, the pre-training has to be performed again
config['FINETUNING']['start_model'] = '../evaluation/' + reference_name + '/models/model_fold_1_epochs_9' 

# writes back the new options
with open('../experiments/' + exp_name +'.ini', 'w') as configfile:
    config.write(configfile)


Fine tune the model

In [31]:
from fine_tuner import FineTuner

In [32]:
t = FineTuner(experiment_name = exp_name)

GPU available
(22, 1)


In [17]:
t.fine_tuning(stor_dir='../evaluation/', restart=False)



Fold: 1
Epoch: 0


RuntimeError: Expected object of scalar type Long but got scalar type Int for argument #2 'target'

Sample from the fine-tuned model

In [None]:
s = Sampler(ex_name)
s.sample(N=n_sampling, stor_dir='../evaluation', T=T_sampling, fold=n_folds, epoch=epochs, valid=sample_valid, novel=sample_novel, unique=sample_unique, write_csv=True)