## Define task

In [12]:
import os
import pandas as pd

task = 'freesolv'
assert task in ['delaney', 'lipo', 'freesolv', 'bace', 'bbbp', 'qm9']

# define arguments
folder = './data/'
raw_dataset_name = 'gdb9.sdf' if task == 'qm9' else task + '.csv'
feature_dict_name = 'feature_dict_' + task + '.npy'
dataset_name = task + '.pt'

# load labels and task_name
if task != 'qm9':
    raw_data_path = os.path.join(folder, 'raw_files', raw_dataset_name)
    raw_file = pd.read_csv(raw_data_path)
    target = raw_file.iloc[:, 1]
    task_name = [raw_file.columns[1]]
else:
    raw_file = pd.read_csv(os.path.join(folder, 'raw_files', 'gdb9.sdf.csv'))
    target = raw_file.iloc[:, 4:16]
    names = raw_file.mol_id
    target_dict = {}
    for i, name in enumerate(names):
        target_dict[name] = target.iloc[i, :]
    task_name = raw_file.columns[4:16].to_list()

## Data preparation

In [None]:
from data_pipeline.data_generating import DataGenerating

# feature generation
data_generate = DataGenerating(folder, raw_dataset_name, feature_dict_name, dataset_name)
data_generate.features_generating()

In [None]:
# dataset creation
data_generate.features_dict_exist = True
data_generate.dataset_creating(target_name=target)

## Training

In [16]:
# define train_args
from train_evaluate.train_utils import TrainArgs

if task == 'qm9':
    metrics = 'MAE'
elif task in ['bbbp', 'bace']:
    metrics = 'ROC-AUC'
elif task in ['delaney', 'lipo', 'freesolv']:
    metrics = 'RMSE'

train_args = TrainArgs(
                lr=0.002,
                batch_size=128, 
                patience=500, 
                task='binary' if task in ['bace', 'bbbp'] else 'regression',
                num_tasks=1 if task != 'qm9' else 12,
                normalize=False if task != 'qm9' else True, 
                interval=10,  # log interval
                task_name=task_name, 
                metrics=metrics, 
                save=True, # save results as csv files
                logs=False # print model hyperparameters
            )

In [None]:
from train_evaluate.train_graph import evaluate
from models.FFiNet_model import FFiNetModel
import torch.nn as nn


params = {
    'hidden_dim': 16,
    'hidden_layers': 2,
    'num_heads': 8,
    'activation': nn.PReLU(), 
    'dropout': 0.2,
    'prediction_layers': 1,
    'prediction_dropout': 0.1,
    'prediction_hidden_dim': 256,
}

evaluate(3, 
        data_path=os.path.join('./data', dataset_name), 
        model_class=FFiNetModel, 
        model_args=params, 
        train_args=train_args
        )