In [None]:
import os
from MolRep import MolRep
from MolRep.Utils.logger import Logger
from MolRep.Utils.config_from_dict import Config
from MolRep.Experiments.experiments import EndToEndExperiment

In [None]:
OUTPUT_DIR = 'Outputs/'
_CONFIG_BASE = 'config_'
_CONFIG_FILENAME = 'config_results.json'
_FOLDS = 5

MODEL_NAME = 'CMPNN'
DATASET_NAME = 'BBBP'

In [None]:
dataset_config, dataset, model_configurations, model_selector, exp_path = MolRep.construct_dataset(
                                                                                            dataset_name = DATASET_NAME,
                                                                                            model_name = MODEL_NAME,
                                                                                            inner_k = _FOLDS,
                                                                                            output_dir=OUTPUT_DIR)

In [None]:
config_id = 0
KFOLD_FOLDER = os.path.join(exp_path, str(_FOLDS) + '_FOLD_MS')
exp_config_name = os.path.join(KFOLD_FOLDER, _CONFIG_BASE + str(config_id + 1))
config_filename = os.path.join(exp_config_name, _CONFIG_FILENAME)
if not os.path.exists(exp_config_name):
    os.makedirs(exp_config_name)

In [None]:
config = model_configurations[config_id]

logger = Logger(str(os.path.join(exp_config_name, 'experiment.log')), mode='a')
logger.log('Configuration: ' + str(config))

In [None]:
k_fold_dict = {
    'config': config,
    'folds': [{} for _ in range(_FOLDS)],
    'avg_TR_score': 0.,
    'avg_VL_score': 0.,
    'std_TR_score': 0.,
    'std_VL_score': 0.
}

In [None]:
dataset_getter = MolRep.construct_dataloader(dataset)
for k in range(_FOLDS):
    dataset_getter.set_inner_k(k)

    fold_exp_folder = os.path.join(exp_config_name, 'FOLD_' + str(k + 1))
    # Create the experiment object which will be responsible for running a specific experiment
    experiment = EndToEndExperiment(config, dataset_config, fold_exp_folder)

    training_score, validation_score = experiment.run_valid(dataset_getter, logger)

    print('training_score:', training_score, 'validation_score:',validation_score)
    logger.log(str(k+1) + ' split, TR Score: ' + str(training_score) +
                ' VL Score: ' + str(validation_score))

    k_fold_dict['folds'][k]['TR_score'] = training_score
    k_fold_dict['folds'][k]['VL_score'] = validation_score

tr_scores = np.array([k_fold_dict['folds'][k]['TR_score'] for k in range(_FOLDS)])
vl_scores = np.array([k_fold_dict['folds'][k]['VL_score'] for k in range(_FOLDS)])

k_fold_dict['avg_TR_score'] = tr_scores.mean()
k_fold_dict['std_TR_score'] = tr_scores.std()
k_fold_dict['avg_VL_score'] = vl_scores.mean()
k_fold_dict['std_VL_score'] = vl_scores.std()

logger.log('TR avg is ' + str(k_fold_dict['avg_TR_score']) + ' std is ' + str(k_fold_dict['std_TR_score']) +
            ' VL avg is ' + str(k_fold_dict['avg_VL_score']) + ' std is ' + str(k_fold_dict['std_VL_score']))