In [1]:
import os

# FILE_DIR = os.path.dirname(__file__)
FILE_DIR = os.path.abspath(".")
PROJECT_DIR = os.path.abspath(f"{FILE_DIR}/..")
PREPROCESSED_DATA_PATH = f"{PROJECT_DIR}/data/preprocessed"

# TODO: Install modified srlearn and BoostSRL as dependencies of this project. After that, I need to remove the following two lines and to change how srlearn modules are being imported in the third line below.
import sys
import json
import logging
import numpy as np
import pandas as pd
sys.path.append(f"{PROJECT_DIR}/../srlearn")
sys.path.append(PROJECT_DIR)
from tqdm import tqdm
from srlearn.database import Database
from srlearn.background import Background
from srlearn.rdn import RDNBoost, RDNBoostTransferLearning, TreeBoostler
from srlearn.weight import WeightFactory
from utils.experiment import getLogger, loadDatabase
from glob import glob
from copy import copy
from typing import Optional, Union, Literal
from IPython.display import clear_output

from concurrent.futures import ProcessPoolExecutor
import multiprocessing.managers

import warnings
warnings.filterwarnings("ignore")

sys.path.append(PROJECT_DIR)

DATA_PATH = "../data/preprocessed"

# **Useful Functions**

## **General**

In [2]:
# Learning from scratch using the original RDN-Boost
def runSingleExperiment_OriginalRDNBoost(
    experimentPath: str = ".", 
    databaseTrain: Database = None,
    databaseTest: Database = None,
    nEstimators: int = 10,
    nodeSize: int = 2,
    maxTreeDepth: int = 3,
    negPosRatio: int = 2,
    numberOfClauses: int = 8,
    numberOfCycles: int = 100,
    ignoreSTDOUT: bool = True,
    logger: logging.Logger = None
) -> dict:
    assert databaseTrain is not None
    assert databaseTest is not None

    if not logger:
        logger = getLogger("Original RDN-B", level = logging.DEBUG)

    path = os.path.join(experimentPath, "originalRDNBoost")
    logger.info("RUNNING ORIGINAL RDN-B...")
    logger.info(f"Progress will be store at {path}")
    
    model = RDNBoost(
        n_estimators = nEstimators, 
        node_size = nodeSize, 
        max_tree_depth = maxTreeDepth, 
        neg_pos_ratio = negPosRatio,
        number_of_clauses = numberOfClauses,
        number_of_cycles = numberOfCycles,
        path = path
    )

    logger.info(f"Training the model on the training set...")
    model.fit(databaseTrain, ignoreSTDOUT = ignoreSTDOUT)

    logger.info(f"Evaluating the model on the test set...")
    model._run_inference(databaseTest, ignoreSTDOUT = ignoreSTDOUT)

    model._generate_dotimages()
    dotImages = model._dotimages
    metrics = model._prediction_metrics

    result = {"model": copy(model), "treeImages": copy(dotImages), "metrics": copy(metrics)}
    return result

In [3]:
# Learning from scratch using our implementation analogous to original RDN-Boost
def runSingleExperiment_AnalogousToRDNBoost(
    experimentPath: str = ".", 
    databaseTrain: Database = None,
    databaseTest: Database = None,
    nEstimators: int = 10,
    nodeSize: int = 2,
    maxTreeDepth: int = 3,
    negPosRatio: int = 2,
    numberOfClauses: int = 8,
    numberOfCycles: int = 100,
    ignoreSTDOUT: bool = True,
    logger: logging.Logger = None
) -> dict:
    assert databaseTrain is not None
    assert databaseTest is not None

    if not logger:
        logger = getLogger("Analogous to RDN-B", level = logging.DEBUG)

    path = os.path.join(experimentPath, "analogousToOriginalRDNBoost")
    logger.info("RUNNING OUR APPROACH ANALOGOUS TO THE ORIGINAL RDN-B...")
    logger.info(f"Progress will be store at {path}")

    targetDomainTargetRelation = databaseTrain.getTargetRelation()

    emptySourceDatabase = Database()
    emptySourceDatabase.getTargetRelation = lambda: targetDomainTargetRelation
    emptySourceDatabase.modes = databaseTrain.modes

    weightFactory = WeightFactory()
    weightStrategy = weightFactory.getWeightStrategy("scalar", weight = 1)

    database = Database.prepareTransferLearningDatabase(
        emptySourceDatabase, 
        databaseTrain, 
        weightStrategy = weightStrategy
    )

    utilityAlpha = 1

    model = RDNBoostTransferLearning(
        n_estimators = nEstimators,
        node_size = nodeSize,
        max_tree_depth = maxTreeDepth,
        neg_pos_ratio = negPosRatio,
        number_of_clauses = numberOfClauses,
        number_of_cycles = numberOfCycles,
        source_utility_alpha = utilityAlpha,
        target_utility_alpha = utilityAlpha,
        path = path
    )

    logger.info(f"Training the model on the training set...")
    model.fit(database, ignoreSTDOUT = ignoreSTDOUT)

    logger.info(f"Evaluating the model on the test set...")
    model._run_inference(databaseTest, ignoreSTDOUT = ignoreSTDOUT)

    model._generate_dotimages()
    dotImages = model._dotimages
    metrics = model._prediction_metrics

    result = {"model": copy(model), "treeImages": copy(dotImages), "metrics": copy(metrics)}
    return result

In [4]:
# Our Transfer Learning implementation
def runSingleExperiment_TransferLearning(
    experimentPath: str = ".", 
    sourceDatabase: Database = None,
    targetDatabaseTrain: Database = None,
    targetDatabaseTest: Database = None,
    nEstimators: int = 10,
    nodeSize: int = 2,
    maxTreeDepth: int = 3,
    negPosRatio: int = 2,
    numberOfClauses: int = 8,
    numberOfCycles: int = 100,
    ignoreSTDOUT: bool = True,
    useRecursion: bool = False,
    randomSeed: int = 10,
    maxFailedNegSamplingRetries: int = 50,
    weightStrategy: WeightFactory = None,
    sourceUtilityAlpha: float = 1,
    targetUtilityAlpha: float = 1,
    utilityAlphaSetIter: int = 1,
    relationMapping: dict = None,
    termTypeMapping: dict = None,
    logger: logging.Logger = None,
) -> dict:
    assert sourceDatabase is not None
    assert targetDatabaseTrain is not None
    assert targetDatabaseTest is not None
    assert weightStrategy is not None
    assert sourceUtilityAlpha >= 0
    assert targetUtilityAlpha >= 0
    assert relationMapping is not None
    assert termTypeMapping is not None

    if not logger:
        logger = getLogger("Transfer Learning RDN-B", level = logging.DEBUG)

    targetDomainTargetRelation = targetDatabaseTrain.getTargetRelation()

    path = os.path.join(experimentPath, "transferLearning")
    logger.info("RUNNING TRANSFER LEARNING...")
    logger.info(f"Progress will be store at {path}")

    logger.info("Mapping source domain to the target domain...")

    sourceTargetRelation = [k for k,v in relationMapping.items() if v == targetDomainTargetRelation][0]
    
    logger.debug(f"Relation mapping: {relationMapping}")
    logger.debug(f"Term type mapping: {termTypeMapping}")

    mappedSourceDatabase = sourceDatabase.setTargetPredicate(
        sourceTargetRelation, 
        useRecursion = useRecursion,
        negPosRatio = negPosRatio,
        maxFailedNegSamplingRetries = maxFailedNegSamplingRetries
    )
    mappedSourceDatabase = mappedSourceDatabase.applyMapping(relationMapping, termTypeMapping, "source")

    logger.info("Combining source and target databases...")

    database = Database.prepareTransferLearningDatabase(
        mappedSourceDatabase, 
        targetDatabaseTrain, 
        weightStrategy = weightStrategy
    )

    model = RDNBoostTransferLearning(
        n_estimators = nEstimators,
        node_size = nodeSize,
        max_tree_depth = maxTreeDepth,
        neg_pos_ratio = negPosRatio,
        number_of_clauses = numberOfClauses,
        number_of_cycles = numberOfCycles,
        source_utility_alpha = sourceUtilityAlpha,
        target_utility_alpha = targetUtilityAlpha,
        utility_alpha_set_iter = utilityAlphaSetIter,
        path = path
    )

    logger.info(f"Training the model on the training set...")
    model.fit(database, ignoreSTDOUT = ignoreSTDOUT)

    logger.info(f"Evaluating the model on the test set...")
    model._run_inference(targetDatabaseTest, ignoreSTDOUT = ignoreSTDOUT)

    model._generate_dotimages()
    dotImages = model._dotimages
    metrics = model._prediction_metrics

    result = {"model": copy(model), "treeImages": copy(dotImages), "metrics": copy(metrics)}
    return result

In [5]:
# Our Transfer Learning implementation
def runSingleExperiment_TreeBoostler(
    experimentPath: str = ".", 
    sourceDatabase: Database = None,
    targetDatabaseTrain: Database = None,
    targetDatabaseTest: Database = None,
    nEstimators: int = 10,
    nodeSize: int = 2,
    maxTreeDepth: int = 3,
    negPosRatio: int = 2,
    numberOfClauses: int = 8,
    numberOfCycles: int = 100,
    ignoreSTDOUT: bool = True,
    searchArgPermutation: bool = True,
    allowSameTargetMap: bool = False,
    refine: bool = True,
    maxRevisionIterations: int = 1,
    logger: logging.Logger = None,
) -> dict:
    assert sourceDatabase is not None
    assert targetDatabaseTrain is not None
    assert targetDatabaseTest is not None

    if not logger:
        logger = getLogger("TreeBoostler", level = logging.DEBUG)

    path = os.path.join(experimentPath, "treeBoostler")
    logger.info("RUNNING TREEBOOSTLER...")
    logger.info(f"Progress will be store at {path}")
    
    model = TreeBoostler(
        searchArgPermutation = searchArgPermutation,
        allowSameTargetMap = allowSameTargetMap,
        refine = refine,
        maxRevisionIterations = maxRevisionIterations,
        n_estimators = nEstimators,
        node_size = nodeSize,
        max_tree_depth = maxTreeDepth,
        neg_pos_ratio = negPosRatio,
        number_of_clauses = numberOfClauses,
        number_of_cycles = numberOfCycles,
        path = path
    )

    logger.info(f"Training the model on the training set...")
    model.fit(sourceDatabase, targetDatabaseTrain, ignoreSTDOUT = ignoreSTDOUT)

    logger.info(f"Evaluating the model on the test set...")
    model._run_inference(targetDatabaseTest, ignoreSTDOUT = ignoreSTDOUT)

    model._generate_dotimages()
    dotImages = model._dotimages
    metrics = model._prediction_metrics

    result = {"model": copy(model), "treeImages": copy(dotImages), "metrics": copy(metrics)}
    return result

In [6]:
def experimentResultSummarization(experimentResult: dict, logger: logging.Logger = None, ):
    if not logger:
        logger = getLogger("Result summarization")

    logger.info("Extracting performance metrics from experiment results:")
    metrics = {}
    for exp, expResults in experimentResult.items():
        metrics[exp] = metrics.get(exp, {})
        for trainFold, foldResults in expResults.items():
            for metricName, metricValue in foldResults["metrics"].items():
                metrics[exp][metricName] = metrics[exp].get(metricName, [])
                metrics[exp][metricName] += [float(metricValue)]

    for exp, expMetrics in metrics.items():
        for metricName, metricValues in expMetrics.items():
            metricValues = np.array(metricValues)
            mean = metricValues.mean()
            std = metricValues.std()
            logger.info(f"{exp}: {metricName} = {mean:.4f} +- {std:.4f}")

In [7]:
def runSingleExperimentFromExperimentDict(experimentDict: dict, logger = None):
    experiment = experimentDict
    experimentID = experiment["id"]
    experimentBasePath = os.path.join(experiment["path"])

    os.makedirs(experimentBasePath, exist_ok = True)

    with open(os.path.join(experimentBasePath, "setting.json"), "w") as f:
        json.dump(experiment, f)

    if not logger:
        logger = getLogger(experimentID, level = logging.DEBUG)

    logger.info("Parsing experiment parameters...")

    useRecursion = experiment.get("useRecursion", False)
    negPosRatio = experiment.get("negPosRatio", 1)
    randomSeed = experiment.get("randomSeed", 10)
    maxFailedNegSamplingRetries = experiment.get("maxFailedNegSamplingRetries", 50)
    numberOfClauses = experiment.get("numberOfClauses", 8)
    numberOfCycles = experiment.get("numberOfCycles", 100)
    maxTreeDepth = experiment.get("maxTreeDepth", 3)
    nEstimators = experiment.get("nEstimators", 10)
    nodeSize = experiment.get("nodeSize", 2)
    sourceUtilityAlpha = experiment.get("sourceUtilityAlpha", 1)
    targetUtilityAlpha = experiment.get("targetUtilityAlpha", 1)
    utilityAlphaSetIter = experiment.get("utilityAlphaSetIter", 1)
    runOriginalRDNBoost = experiment.get("runOriginalRDNBoost", False)
    runTransferLearning = experiment.get("runTransferLearning", False)
    runAnalogousToOriginalRDNBoost = experiment.get("runAnalogousToOriginalRDNBoost", False)
    runTreeBoostler = experiment.get("runTreeBoostler", False)
    ignoreSTDOUT = experiment.get("ignoreSTDOUT", False)
    searchArgPermutation = experiment.get("searchArgPermutation", True)
    allowSameTargetMap = experiment.get("allowSameTargetMap", False)
    refine = experiment.get("refine", True)
    maxRevisionIterations = experiment.get("maxRevisionIterations", 1)
    testFold = experiment.get("testFold", None)

    sourceDatabase = loadDatabase(
        folds = None, 
        useRecursion = useRecursion, 
        logger = logger,
        negPosRatio = negPosRatio,
        maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
        **experiment["sourceDatabase"]
    )

    weightFactory = WeightFactory()
    weightStrategy = weightFactory.getWeightStrategy(
        experiment["weight"]["strategy"], 
        **experiment["weight"]["parameters"]
    )

    relationMapping = experiment["mapping"]["relationMapping"]
    termTypeMapping = experiment["mapping"]["termTypeMapping"]

    targetDatabasePath = experiment["targetDatabase"]["path"]
    targetDatabaseTrainFolds = [os.path.basename(path) for path in glob(f"{targetDatabasePath}/fold*")]
    targetDatabaseTrainFolds = list(set(targetDatabaseTrainFolds) - set([experiment["testFold"]]))

    targetDatabaseTestFold = testFold

    logger.info(f"RUNNING EXPERIMENTS USING {targetDatabaseTestFold.upper()} AS TEST FOLD...")

    logger.info("Loading target database for training...")
    logger.debug(f"Train folds: {targetDatabaseTrainFolds}")

    targetDatabaseTrain = loadDatabase(
        folds = targetDatabaseTrainFolds, 
        useRecursion = useRecursion, 
        logger = logger,
        negPosRatio = negPosRatio,
        maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
        **experiment["targetDatabase"]
    )

    logger.info("Loading target database for testing...")
    logger.debug(f"Test fold: {targetDatabaseTestFold}")

    targetDatabaseTest = loadDatabase(
        folds = [targetDatabaseTestFold], 
        useRecursion = useRecursion, 
        logger = logger,
        negPosRatio = negPosRatio,
        maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
        **experiment["targetDatabase"]
    )

    targetDomainTargetRelation = targetDatabaseTrain.getTargetRelation()

    logger.debug(f"Target relation for target database: {targetDomainTargetRelation}")

    result = runSingleExperiment_TransferLearning(
        experimentPath = experimentBasePath,
        sourceDatabase = sourceDatabase,
        targetDatabaseTrain = targetDatabaseTrain,
        targetDatabaseTest = targetDatabaseTest,
        nEstimators = nEstimators,
        nodeSize = nodeSize,
        maxTreeDepth = maxTreeDepth,
        negPosRatio = negPosRatio,
        numberOfClauses = numberOfClauses,
        numberOfCycles = numberOfCycles,
        ignoreSTDOUT = ignoreSTDOUT,
        useRecursion = useRecursion,
        randomSeed = randomSeed,
        maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
        weightStrategy = weightStrategy,
        sourceUtilityAlpha = sourceUtilityAlpha,
        targetUtilityAlpha = targetUtilityAlpha,
        utilityAlphaSetIter = utilityAlphaSetIter,
        relationMapping = relationMapping,
        termTypeMapping = termTypeMapping,
        logger = logger,
    )

    metricsJSONPath = os.path.join(experimentBasePath, "metrics.json")
    logger.info(f"Storing performance metrics at {metricsJSONPath}.")

    allMetrics = result["metrics"]

    with open(metricsJSONPath, "w") as f:
        json.dump(allMetrics, f)

    logger.info("Experiment has been finished.")

# **Experiments**

## **Cross-Validation for Transfer Setting**

It assumes that there is not enough target data for learning and resorts to a related domain (source) to augment the training data. To simulate low target data availability, each iteration of our cross validation for transfer settings selects one fold for training and the remaining for test. This is the opposite of traditional cross validation. 

In our transfer experiments, we also consider learning from scratch. In this case, the learning only relies on the limited target data, as simulated by our cross validation procedure.

In [8]:
def runAllFolds(experimentDict: dict, logger = None) -> dict:
    """Given a dict specifying the experiment setting, it runs an experiment similar to k-fold cross validation. The difference is that, at each iteration, only one fold is used for training while the remaining k-1 are used for testing. The is because we simulate a scenario where there is only a few data on the target domain. It returns a dict with all the results."""

    experiment = experimentDict
    experimentID = experiment["id"]
    experimentBasePath = os.path.join(experiment["path"], experimentID)

    os.makedirs(experimentBasePath, exist_ok = True)

    with open(os.path.join(experimentBasePath, "setting.json"), "w") as f:
        json.dump(experiment, f)

    if not logger:
        logger = getLogger(experimentID, level = logging.DEBUG)

    logger.info("Parsing experiment parameters...")

    useRecursion = experiment.get("useRecursion", False)
    negPosRatio = experiment.get("negPosRatio", 1)
    randomSeed = experiment.get("randomSeed", 10)
    maxFailedNegSamplingRetries = experiment.get("maxFailedNegSamplingRetries", 50)
    numberOfClauses = experiment.get("numberOfClauses", 8)
    numberOfCycles = experiment.get("numberOfCycles", 100)
    maxTreeDepth = experiment.get("maxTreeDepth", 3)
    nEstimators = experiment.get("nEstimators", 10)
    nodeSize = experiment.get("nodeSize", 2)
    sourceUtilityAlpha = experiment.get("sourceUtilityAlpha", 1)
    targetUtilityAlpha = experiment.get("targetUtilityAlpha", 1)
    utilityAlphaSetIter = experiment.get("utilityAlphaSetIter", 1)
    runOriginalRDNBoost = experiment.get("runOriginalRDNBoost", False)
    runTransferLearning = experiment.get("runTransferLearning", False)
    runAnalogousToOriginalRDNBoost = experiment.get("runAnalogousToOriginalRDNBoost", False)
    runTreeBoostler = experiment.get("runTreeBoostler", False)
    ignoreSTDOUT = experiment.get("ignoreSTDOUT", False)
    searchArgPermutation = experiment.get("searchArgPermutation", True)
    allowSameTargetMap = experiment.get("allowSameTargetMap", False)
    refine = experiment.get("refine", True)
    maxRevisionIterations = experiment.get("maxRevisionIterations", 1)
    testFold = experiment.get("testFold", None)

    anyModelIsSet = runOriginalRDNBoost or runTransferLearning or runAnalogousToOriginalRDNBoost or runTreeBoostler
    assert anyModelIsSet, "No model to run. `runOriginalRDNBoost`, `runTransferLearning`, and `analogousToOriginalRDNBoost` can not be set to False simultaneously."

    if runTransferLearning or runTreeBoostler:
        logger.info("Loading source database...")
        
        sourceDatabase = loadDatabase(
            folds = None, 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            **experiment["sourceDatabase"]
        )

        if runTransferLearning:
            weightFactory = WeightFactory()
            weightStrategy = weightFactory.getWeightStrategy(experiment["weight"]["strategy"], **experiment["weight"]["parameters"])
        
            relationMapping = experiment["mapping"]["relationMapping"]
            termTypeMapping = experiment["mapping"]["termTypeMapping"]

    targetDatabasePath = experiment["targetDatabase"]["path"]
    allTargetFoldsExceptTest = [os.path.basename(path) for path in glob(f"{targetDatabasePath}/fold*")]
    if testFold:
        allTargetFoldsExceptTest = list(set(allTargetFoldsExceptTest) - set([testFold]))

    result = {}

    for fold in allTargetFoldsExceptTest:
        experimentFoldPath = os.path.join(experimentBasePath, fold)
        os.makedirs(experimentFoldPath, exist_ok = True)

        logger.info(f"RUNNING EXPERIMENTS USING {fold.upper()} AS TRAINING FOLD...")

        targetDatabaseTrainFold = fold
        targetDatabaseTestFolds = list(set(allTargetFoldsExceptTest) - set([targetDatabaseTrainFold]))

        logger.info("Loading target database for training...")
        logger.debug(f"Train fold: {targetDatabaseTrainFold}")

        targetDatabaseTrain = loadDatabase(
            folds = [targetDatabaseTrainFold], 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            **experiment["targetDatabase"]
        )

        logger.info("Loading target database for testing...")
        logger.debug(f"Test folds: {targetDatabaseTestFolds}")

        targetDatabaseTest = loadDatabase(
            folds = targetDatabaseTestFolds, 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            **experiment["targetDatabase"]
        )

        targetDomainTargetRelation = targetDatabaseTrain.getTargetRelation()

        logger.debug(f"Target relation for target database: {targetDomainTargetRelation}")

        if runOriginalRDNBoost:
            result["originalRDNBoost"] = result.get("originalRDNBoost", {})
            result["originalRDNBoost"][targetDatabaseTrainFold] = runSingleExperiment_OriginalRDNBoost(
                experimentPath = experimentFoldPath, 
                databaseTrain = targetDatabaseTrain,
                databaseTest = targetDatabaseTest,
                nEstimators = nEstimators,
                nodeSize = nodeSize,
                maxTreeDepth = maxTreeDepth,
                negPosRatio = negPosRatio,
                numberOfClauses = numberOfClauses,
                numberOfCycles = numberOfCycles,
                ignoreSTDOUT = ignoreSTDOUT,
                logger = logger        
            )

        if runAnalogousToOriginalRDNBoost:
            result["analogousToOriginalRDNBoost"] = result.get("analogousToOriginalRDNBoost", {})
            result["analogousToOriginalRDNBoost"][targetDatabaseTrainFold] = runSingleExperiment_AnalogousToRDNBoost(
                experimentPath = experimentFoldPath, 
                databaseTrain = targetDatabaseTrain,
                databaseTest = targetDatabaseTest,
                nEstimators = nEstimators,
                nodeSize = nodeSize,
                maxTreeDepth = maxTreeDepth,
                negPosRatio = negPosRatio,
                numberOfClauses = numberOfClauses,
                numberOfCycles = numberOfCycles,
                ignoreSTDOUT = ignoreSTDOUT,
                logger = logger        
            )

        if runTransferLearning:
            result["transferLearning"] = result.get("transferLearning", {})
            result["transferLearning"][targetDatabaseTrainFold] = runSingleExperiment_TransferLearning(
                experimentPath = experimentFoldPath,
                sourceDatabase = sourceDatabase,
                targetDatabaseTrain = targetDatabaseTrain,
                targetDatabaseTest = targetDatabaseTest,
                nEstimators = nEstimators,
                nodeSize = nodeSize,
                maxTreeDepth = maxTreeDepth,
                negPosRatio = negPosRatio,
                numberOfClauses = numberOfClauses,
                numberOfCycles = numberOfCycles,
                ignoreSTDOUT = ignoreSTDOUT,
                useRecursion = useRecursion,
                randomSeed = randomSeed,
                maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
                weightStrategy = weightStrategy,
                sourceUtilityAlpha = sourceUtilityAlpha,
                targetUtilityAlpha = targetUtilityAlpha,
                utilityAlphaSetIter = utilityAlphaSetIter,
                relationMapping = relationMapping,
                termTypeMapping = termTypeMapping,
                logger = logger,
            )

        if runTreeBoostler:
            result["treeBoostler"] = result.get("treeBoostler", {})
            result["treeBoostler"][targetDatabaseTrainFold] = runSingleExperiment_TreeBoostler(
                experimentPath = experimentFoldPath,
                sourceDatabase = sourceDatabase,
                targetDatabaseTrain = targetDatabaseTrain,
                targetDatabaseTest = targetDatabaseTest,
                nEstimators = nEstimators,
                nodeSize = nodeSize,
                maxTreeDepth = maxTreeDepth,
                negPosRatio = negPosRatio,
                numberOfClauses = numberOfClauses,
                numberOfCycles = numberOfCycles,
                ignoreSTDOUT = ignoreSTDOUT,
                searchArgPermutation = searchArgPermutation,
                allowSameTargetMap = allowSameTargetMap,
                refine = refine,
                maxRevisionIterations = maxRevisionIterations,
                logger = logger,
            )

    metricsJSONPath = os.path.join(experimentBasePath, "metrics.json")
    logger.info(f"Storing performance metrics at {metricsJSONPath}.")

    allMetrics = {}
    for model, folds in result.items():
        allMetrics[model] = {fold: foldResults["metrics"] for fold, foldResults in folds.items()}

    with open(metricsJSONPath, "w") as f:
        json.dump(allMetrics, f)

    logger.info("Experiment has been finished.")
        
    return result

### **Without a Separated Test Fold**

In [9]:
experiments = {}
with open("experiments-transferCrossValidation.json") as f:
    experiments = json.load(f)
totalExperiments = len(experiments)
totalExperiments

4032

In [22]:
transferLearningExperiments = [experimentDict for experimentDict in experiments if "runTransferLearning" in experimentDict]
originalRDNBoostExperiments = [experimentDict for experimentDict in experiments if "runOriginalRDNBoost" in experimentDict]
analogousToOriginalRDNBoostExperiments = [experimentDict for experimentDict in experiments if "runAnalogousToOriginalRDNBoost" in experimentDict]
treeBoostlerExperiments = [experimentDict for experimentDict in experiments if "runTreeBoostler" in experimentDict]
print("Total Transfer Learning Experiments:", len(transferLearningExperiments))
print("Total Original RDN-Boost Experiments:", len(originalRDNBoostExperiments))
print("Total Analogous to Original RDN-Boost Experiments:", len(analogousToOriginalRDNBoostExperiments))
print("Total TreeBoostler Experiments:", len(treeBoostlerExperiments))

Total Transfer Learning Experiments: 4000
Total Original RDN-Boost Experiments: 8
Total Analogous to Original RDN-Boost Experiments: 8
Total TreeBoostler Experiments: 16


In [35]:
# This function can be leveraged to prioritize experiments.
def skipExperiment(experimentDict):
    # The experiment has already been carried out.
    experimentID = experimentDict["id"]
    experimentPath = experimentDict['path']
    if os.path.exists(f"{experimentPath}/{experimentID}/metrics.json"):
        return True, "The experiment has already been carried out."

    # Model filtering [Uncomment the models whose experiments you would like to run]
    # =============================================================================================

    # 1) It is not an experiment from our transfer learning approach.
    if not "runTransferLearning" in experimentDict:
        return True, "It is not an experiment from our transfer learning approach."

    # # 2) It is not an experiment from our approach equivalent to original RDN-Boost.
    # if not "runAnalogousToOriginalRDNBoost" in experimentDict:
    #     return True, "It is not an experiment from our approach equivalent to original RDN-Boost."

    # # 3) It is not an experiment from original RDN-Boost.
    # if not "runOriginalRDNBoost" in experimentDict:
    #     return True, "It is not an experiment from original RDN-Boost."

    # # 4) It is not an experiment from TreeBoostler.
    # if not "runTreeBoostler" in experimentDict:
    #     return True, "It is not an experiment from TreeBoostler"

    # ============================================================================================

    # Run only transfer experiment from imdb to cora
    sourceDomain = os.path.basename(experimentDict["sourceDatabase"]["path"])
    targetDomain = os.path.basename(experimentDict["targetDatabase"]["path"])

    if sourceDomain != "imdb" or targetDomain != "uwcse":
        return True, "It is not a transfer from IMDB to Cora"

    # # Run only transfer with utilityAlphaSetIter != 1 (we already have results for this setting)
    # if experimentDict["utilityAlphaSetIter"] == 1:
    #     return True, "UtilityAlphaSetIter is equal to 1."

    # =============================================================================================
    
    # # It is a trivial experiment, i.e., it usually achieves very good performances in related work.
    # sourceDomain = os.path.basename(experimentDict["sourceDatabase"]["path"])
    # targetDomain = os.path.basename(experimentDict["targetDatabase"]["path"])

    # if sourceDomain == "cora" and targetDomain == "imdb":
    #     return True, "Cora to IMDB transferring usually achieves very good performances in related work."

    # if sourceDomain == "yeast" and targetDomain == "twitter":
    #     return True, "Yeast to Twitter transferring usually achieves very good performances in related work."

    # if sourceDomain == "uwcse" and targetDomain == "imdb":
    #     return True, "USCSE to IMDB transferring usually achieves very good performances in related work."

    # if sourceDomain == "nell_finances" and targetDomain == "nell_sports":
    #     return True, "NELL Finances to NELL Sports transferring usually achieves very good performances in related work."

    return False, ""

In [36]:
start = 1 # An int greater or equal to 1
skippedExperiments = []
experimentsToRun = []
numProcesses = 3

for i, experimentDict in enumerate(experiments[start-1:], start = start):
    # TODO: I am temporarily ignoring other models. I  need to further run the other experiments
    shouldSkipExperiment, skipMessage = skipExperiment(experimentDict)
    experimentID = experimentDict["id"]
    if shouldSkipExperiment:
        skippedExperiments.append((experimentID, skipMessage))
    else:
        os.system(f"rm -rf {experimentDict['path']}/{experimentID}")
        experimentsToRun.append(experimentDict)

len(experimentsToRun)

0

In [33]:
experimentsRunningMode = "parallel" # Either "parallel" or "sequential"

In [34]:
# Parallel execution of the experiments. 
# TODO: We get a Kernel Crash and it occurs only when we import the models from srlearn.rdn under Python 3.8.10. In our tests, this problem is solved when running over Python 3.10.5, but the reason why it does not works under Python 3.8.10 is still unknown.
 
if experimentsRunningMode == "parallel":
    def safePrint(message, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        consoleOutputLock.acquire()
        print(message)
        consoleOutputLock.release()

    def experimentWorker(experimentDict: dict, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        experimentID = experimentDict["id"]
        safePrint(f"Starting experiment {experimentID}...", consoleOutputLock)
        try:
            experimentPath = f"{experimentDict['path']}/{experimentID}"
            os.makedirs(experimentPath, exist_ok = True)    
            logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log", consoleOutput = False)
            logger.info(f"RUNNING EXPERIMENT {i}/{totalExperiments}...")
            experimentResult = runAllFolds(experimentDict, logger = logger)
            experimentResultSummarization(experimentResult, logger = logger)
            safePrint(f"Experiment finished successfully: {experimentID}...", consoleOutputLock)
        except Exception as e:
            safePrint(f"The following exception was raised while running the experiment {experimentID}: {e}. Check the logs in the experiment directory for more details.", consoleOutputLock)
            raise e

    with ProcessPoolExecutor(max_workers = 8) as p:
        with multiprocessing.Manager() as manager:
            consoleOutputLock = manager.Lock()
            futures = p.map(experimentWorker, experimentsToRun, [consoleOutputLock for experiment in experimentsToRun])
            for result in futures:
                pass

Starting experiment 2b679b4698cbc49ea7cb74ba976208d4869c19eb7fd756fa1ecf4f64e2b568b4...
Starting experiment 1c0d36fc64e2507a4d9c187f5c220e1a6fcd30a03bf87b3eb020f0fa29b3cbaa...
Starting experiment af5270bc9334600ae7ba34f8d1bc9f8318f6e69cab92101fd470b0abd54fc4e8...
Starting experiment ec008509c3aba5e2d2648d44ef72526927fd719d8c7770ce01996eb97acd071d...


Starting experiment ff22846a48d94db7addc85cfc3527f8e39d23989678a6f9f11e762d591267925...
Starting experiment 107d99fc81f6624e775a345ec457327e6faf8a326f5073c7db26d1b0329c0a26...
Starting experiment 4a3e5204395480bd1d0411fe7961ea2bbc379bc28518d8ce2a88e6bd4bf2b452...
Starting experiment 4c15fef6886ffa04f1d7322fd9fb74e7916f4c542f24b354a79113abc9a04e04...
Experiment finished successfully: ff22846a48d94db7addc85cfc3527f8e39d23989678a6f9f11e762d591267925...
Starting experiment 7d69218902a372cedefca661b119cd56b89f3b3b2bba15386cba690ec0efa3fe...
Experiment finished successfully: 107d99fc81f6624e775a345ec457327e6faf8a326f5073c7db26d1b0329c0a26...
Experiment finished successfully: 7d69218902a372cedefca661b119cd56b89f3b3b2bba15386cba690ec0efa3fe...
Experiment finished successfully: 4c15fef6886ffa04f1d7322fd9fb74e7916f4c542f24b354a79113abc9a04e04...
Experiment finished successfully: ec008509c3aba5e2d2648d44ef72526927fd719d8c7770ce01996eb97acd071d...
Experiment finished successfully: 4a3e5204395480b

In [None]:
# Sequential execution of the experiments. 
if experimentsRunningMode == "sequential":
    totalExperimentsToRun = len(experimentsToRun)
    for i, experimentDict in enumerate(experimentsToRun, start = 1):
        experimentID = experimentDict["id"]
        experimentPath = f"{experimentDict['path']}/{experimentID}"
        os.makedirs(experimentPath, exist_ok = True)    
        logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log")
        logger.info(f"RUNNING EXPERIMENT {i}/{totalExperimentsToRun}...")
        experimentResult = runAllFolds(experimentDict, logger = logger)
        experimentResultSummarization(experimentResult, logger = logger)
        clear_output(wait = True)

In [None]:
print(skippedExperiments)

### **With a Separated Test Fold (Nested CV)**

#### **Inner Loop of the Nested CV**

In [10]:
experiments = {}
with open("experiments-transferCrossValidationWithTestFold.json") as f:
    experiments = json.load(f)
totalExperiments = len(experiments)
totalExperiments

16128

In [11]:
transferLearningExperiments = [experimentDict for experimentDict in experiments if "runTransferLearning" in experimentDict]
originalRDNBoostExperiments = [experimentDict for experimentDict in experiments if "runOriginalRDNBoost" in experimentDict]
analogousToOriginalRDNBoostExperiments = [experimentDict for experimentDict in experiments if "runAnalogousToOriginalRDNBoost" in experimentDict]
treeBoostlerExperiments = [experimentDict for experimentDict in experiments if "runTreeBoostler" in experimentDict]
print("Total Transfer Learning Experiments:", len(transferLearningExperiments))
print("Total Original RDN-Boost Experiments:", len(originalRDNBoostExperiments))
print("Total Analogous to Original RDN-Boost Experiments:", len(analogousToOriginalRDNBoostExperiments))
print("Total TreeBoostler Experiments:", len(treeBoostlerExperiments))

Total Transfer Learning Experiments: 16000
Total Original RDN-Boost Experiments: 32
Total Analogous to Original RDN-Boost Experiments: 32
Total TreeBoostler Experiments: 64


In [14]:
# This function can be leveraged to prioritize experiments.
def skipExperiment(experimentDict):
    # The experiment has already been carried out.
    experimentID = experimentDict["id"]
    experimentPath = experimentDict['path']
    if os.path.exists(f"{experimentPath}/{experimentID}/metrics.json"):
        return True, "The experiment has already been carried out."

    # =============================================================================================
    # Model filtering [Uncomment the models whose experiments you would like to run]

    # 1) It is not an experiment from our transfer learning approach.
    if not "runTransferLearning" in experimentDict:
        return True, "It is not an experiment from our transfer learning approach."

    # # 2) It is not an experiment from our approach equivalent to original RDN-Boost.
    # if not "runAnalogousToOriginalRDNBoost" in experimentDict:
    #     return True, "It is not an experiment from our approach equivalent to original RDN-Boost."

    # # 3) It is not an experiment from original RDN-Boost.
    # if not "runOriginalRDNBoost" in experimentDict:
    #     return True, "It is not an experiment from original RDN-Boost."

    # # 4) It is not an experiment from TreeBoostler.
    # if not "runTreeBoostler" in experimentDict:
    #     return True, "It is not an experiment from TreeBoostler"

    # ============================================================================================

    # Run only transfer experiment from imdb to cora
    sourceDomain = os.path.basename(experimentDict["sourceDatabase"]["path"])
    targetDomain = os.path.basename(experimentDict["targetDatabase"]["path"])

    if sourceDomain != "nell_sports" or targetDomain != "nell_finances":
        return True, "It is not a transfer from NELL Sports to NELL Finances"

    # ============================================================================================

    # # Run only transfer with utilityAlphaSetIter != 1 (we already have results for this setting)
    # if experimentDict["utilityAlphaSetIter"] == 1:
    #     return True, "UtilityAlphaSetIter is equal to 1."

    # =============================================================================================
    
    # # Run only experiments for which the test set is built of only a given fold (e.g., fold00)

    testFold = experimentDict["testFold"]
    foldOfInterest = "fold00"
    if testFold != foldOfInterest:
        return True, f"We are considering only experiments in which the test set is built of only {foldOfInterest}"

    # ============================================================================================

    # # It is a trivial experiment, i.e., it usually achieves very good performances in related work.
    # sourceDomain = os.path.basename(experimentDict["sourceDatabase"]["path"])
    # targetDomain = os.path.basename(experimentDict["targetDatabase"]["path"])

    # if sourceDomain == "cora" and targetDomain == "imdb":
    #     return True, "Cora to IMDB transferring usually achieves very good performances in related work."

    # if sourceDomain == "yeast" and targetDomain == "twitter":
    #     return True, "Yeast to Twitter transferring usually achieves very good performances in related work."

    # if sourceDomain == "uwcse" and targetDomain == "imdb":
    #     return True, "USCSE to IMDB transferring usually achieves very good performances in related work."

    # if sourceDomain == "nell_finances" and targetDomain == "nell_sports":
    #     return True, "NELL Finances to NELL Sports transferring usually achieves very good performances in related work."

    return False, ""

In [15]:
start = 1 # An int greater or equal to 1
skippedExperiments = []
experimentsToRun = []
numProcesses = 3

for i, experimentDict in enumerate(experiments[start-1:], start = start):
    # TODO: I am temporarily ignoring other models. I  need to further run the other experiments
    shouldSkipExperiment, skipMessage = skipExperiment(experimentDict)
    experimentID = experimentDict["id"]
    if shouldSkipExperiment:
        skippedExperiments.append((experimentID, skipMessage))
    else:
        os.system(f"rm -rf {experimentDict['path']}/{experimentID}")
        experimentsToRun.append(experimentDict)

len(experimentsToRun)

500

In [16]:
experimentsRunningMode = "parallel" # Either "parallel" or "sequential"

In [None]:
# Parallel execution of the experiments. 
# TODO: We get a Kernel Crash and it occurs only when we import the models from srlearn.rdn under Python 3.8.10. In our tests, this problem is solved when running over Python 3.10.5, but the reason why it does not works under Python 3.8.10 is still unknown.
 
if experimentsRunningMode == "parallel":
    def safePrint(message, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        consoleOutputLock.acquire()
        print(message)
        consoleOutputLock.release()

    def experimentWorker(experimentDict: dict, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        experimentID = experimentDict["id"]
        safePrint(f"Starting experiment {experimentID}...", consoleOutputLock)
        try:
            experimentPath = f"{experimentDict['path']}/{experimentID}"
            os.makedirs(experimentPath, exist_ok = True)    
            logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log", consoleOutput = False)
            logger.info(f"RUNNING EXPERIMENT {i}/{totalExperiments}...")
            experimentResult = runAllFolds(experimentDict, logger = logger)
            experimentResultSummarization(experimentResult, logger = logger)
            safePrint(f"Experiment finished successfully: {experimentID}...", consoleOutputLock)
        except Exception as e:
            safePrint(f"The following exception was raised while running the experiment {experimentID}: {e}. Check the logs in the experiment directory for more details.", consoleOutputLock)
            raise e

    with ProcessPoolExecutor(max_workers = 8) as p:
        with multiprocessing.Manager() as manager:
            consoleOutputLock = manager.Lock()
            futures = p.map(experimentWorker, experimentsToRun, [consoleOutputLock for experiment in experimentsToRun])
            for result in futures:
                pass

Starting experiment 9f68476e61fb7ba124ff1c8df32f385da746f67a3687a1fef949633487814cf2...
Starting experiment 39f07fa5339a538b5d697c6a25b9d9e2518e980f4c5de2ee76f84ea4f59c3cc4...
Starting experiment 26d6bb44309cd978cbbc049cf6dd0c1cfa7122011e8ac14982e179f765592413...
Starting experiment 0b0110f06e93495fad76f2401ec6af29a9c5d37213e169661afb869011fb8281...
Starting experiment c138235ae7e9e0ace10ad1b9b8f36b58e389fc38f654d5d99af312c582a65947...
Starting experiment 91a9babfeaaa168f97da212af7cd7f606e25d393bd8b389948e03bd6a6916cc6...
Starting experiment c6a9365adb87bf0f25449b7c0d63f8352ab9304614da1fbd59bb84c75553733e...
Starting experiment abccab8bbb98024c84968666b5d89c9a88b4b07130dac2aa3ac2f9e449e7914f...
Experiment finished successfully: 0b0110f06e93495fad76f2401ec6af29a9c5d37213e169661afb869011fb8281...
Starting experiment 7fac00866083a8f1c435dcf008d7b66efbf55123718d36b3f3c4fe7cb71a7c40...
Experiment finished successfully: 9f68476e61fb7ba124ff1c8df32f385da746f67a3687a1fef949633487814cf2...
Star

In [None]:
# # Sequential execution of the experiments. 
# if experimentsRunningMode == "sequential":
#     totalExperimentsToRun = len(experimentsToRun)
#     for i, experimentDict in enumerate(experimentsToRun, start = 1):
#         experimentID = experimentDict["id"]
#         experimentPath = f"{experimentDict['path']}/{experimentID}"
#         os.makedirs(experimentPath, exist_ok = True)    
#         logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log")
#         logger.info(f"RUNNING EXPERIMENT {i}/{totalExperimentsToRun}...")
#         experimentResult = runAllFolds(experimentDict, logger = logger)
#         experimentResultSummarization(experimentResult, logger = logger)
#         clear_output(wait = True)

In [None]:
print(skippedExperiments)

#### **Outer Loop of the Nested CV**

In [9]:
# Finding best hyperparameters
experimentsBasePath = "./experiments/crossValidationWithTestSet"
experimentsPathList = [os.path.dirname(path) for path in glob(f"{experimentsBasePath}/*/metrics.json")]
resultsJSON = {"metrics": [], "settings": []}
for experimentPath in tqdm(experimentsPathList):
    with open(f"{experimentPath}/metrics.json") as metricsFile:
        metricsJSON = json.load(metricsFile)["transferLearning"]
    with open(f"{experimentPath}/setting.json") as settingFile:
        settingJSON = json.load(settingFile)
    resultsJSON["metrics"].append(metricsJSON)
    resultsJSON["settings"].append(settingJSON)

100%|██████████| 1964/1964 [00:10<00:00, 183.74it/s]


In [10]:
metricsDF = pd.json_normalize(resultsJSON["metrics"]).astype(float)
columnsAUCROC = [column for column in metricsDF.columns if "aucROC" in column]
columnsAUCPR = [column for column in metricsDF.columns if "aucPR" in column]
metricsDF["aucPR_mean"] = metricsDF[columnsAUCPR].mean(axis = 1)
metricsDF["aucPR_std"] = metricsDF[columnsAUCPR].std(axis = 1)
metricsDF["aucROC_mean"] = metricsDF[columnsAUCROC].mean(axis = 1)
metricsDF["aucROC_std"] = metricsDF[columnsAUCROC].std(axis = 1)
metricsDF["cvIterations"] = metricsDF[columnsAUCROC].notna().sum(axis = 1)
metricsDF = metricsDF[["aucPR_mean", "aucPR_std", "aucROC_mean", "aucROC_std", "cvIterations"]]

settingsDF = pd.DataFrame(data = resultsJSON["settings"])
settingsDF["sourceDatabase"] = settingsDF["sourceDatabase"].apply(lambda data: os.path.basename(data["path"]))
settingsDF["targetDatabase"] = settingsDF["targetDatabase"].apply(lambda data: os.path.basename(data["path"]))

resultsDF = pd.concat([metricsDF, settingsDF], axis = 1)

In [11]:
bestResultsDF = resultsDF.sort_values(
    ["sourceDatabase", "targetDatabase", "testFold", "aucPR_mean"],
    ascending = False
)
bestResultsDF = bestResultsDF.drop_duplicates(
    subset = ["sourceDatabase", "targetDatabase", "testFold"],
    keep = "first"
)
bestResultsDF = bestResultsDF.set_index(["sourceDatabase", "targetDatabase", "testFold"])
bestResultsDF = bestResultsDF[["id", "aucPR_mean", "aucPR_std", "aucROC_mean", "aucROC_std", "cvIterations"]]
bestResultsDF = bestResultsDF.reset_index()
bestResultsDF

Unnamed: 0,sourceDatabase,targetDatabase,testFold,id,aucPR_mean,aucPR_std,aucROC_mean,aucROC_std,cvIterations
0,twitter,yeast,fold01,5d77f1710666c44a3281162db769ad09afca80574c7e0f...,0.995216,0.00098,0.997916,0.000428,3
1,twitter,yeast,fold00,a24bcce11e24344ed269fbdc0702dee2fd8a7a42c0a490...,0.994411,0.003716,0.997633,0.001563,3
2,nell_sports,nell_finances,fold00,27670968c9e86828e00b0ce19af63400999ef2715cea64...,0.743474,0.018761,0.8133,0.016007,2
3,imdb,cora,fold00,0171e9674931f58ecfa2d7ace13956976ee38adb25c4ac...,0.80437,0.014185,0.924848,0.010727,4


In [12]:
experimentsToRun = []
for i in bestResultsDF.index:
    row = bestResultsDF.loc[i]
    experimentId = row["id"]
    sourceDatabase = row["sourceDatabase"]
    targetDatabase = row["targetDatabase"]
    testFold = row["testFold"]
    with open(f"{experimentsBasePath}/{experimentId}/setting.json") as f:
        experimentDict = json.load(f)
        experimentDict["path"] = os.path.join(
            experimentDict["path"], "outerLoopNestedCV", f"{sourceDatabase}_{targetDatabase}", testFold
        )
        experimentsToRun.append(experimentDict)
totalExperiments = len(experimentsToRun)
totalExperiments

4

In [14]:
experimentsRunningMode = "parallel"

In [15]:
if experimentsRunningMode == "parallel":
    def safePrint(message, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        consoleOutputLock.acquire()
        print(message)
        consoleOutputLock.release()

    def experimentWorker(experimentDict: dict, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        experimentID = experimentDict["id"]
        safePrint(f"Starting experiment {experimentID}...", consoleOutputLock)
        try:
            experimentPath = f"{experimentDict['path']}"
            os.makedirs(experimentPath, exist_ok = True)    
            logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log", consoleOutput = False)
            logger.info(f"RUNNING EXPERIMENT {i}/{totalExperiments}...")
            experimentResult = runSingleExperimentFromExperimentDict(experimentDict, logger = logger)
            safePrint(f"Experiment finished successfully: {experimentID}...", consoleOutputLock)
        except Exception as e:
            safePrint(f"The following exception was raised while running the experiment {experimentID}: {e}. Check the logs in the experiment directory for more details.", consoleOutputLock)
            raise e

    with ProcessPoolExecutor(max_workers = 8) as p:
        with multiprocessing.Manager() as manager:
            consoleOutputLock = manager.Lock()
            futures = p.map(experimentWorker, experimentsToRun, [consoleOutputLock for experiment in experimentsToRun])
            for result in futures:
                pass

Starting experiment 5d77f1710666c44a3281162db769ad09afca80574c7e0fac7d469aa962102a5a...
Starting experiment a24bcce11e24344ed269fbdc0702dee2fd8a7a42c0a49018a55990a446096e51...
Starting experiment 0171e9674931f58ecfa2d7ace13956976ee38adb25c4ac170a31aeade600b838...
Starting experiment 27670968c9e86828e00b0ce19af63400999ef2715cea64eae4b9881dae0d28d2...
Experiment finished successfully: a24bcce11e24344ed269fbdc0702dee2fd8a7a42c0a49018a55990a446096e51...
Experiment finished successfully: 27670968c9e86828e00b0ce19af63400999ef2715cea64eae4b9881dae0d28d2...
Experiment finished successfully: 5d77f1710666c44a3281162db769ad09afca80574c7e0fac7d469aa962102a5a...
Experiment finished successfully: 0171e9674931f58ecfa2d7ace13956976ee38adb25c4ac170a31aeade600b838...


## **Traditional Cross-Validation (no transfer)**

It consists of performing traditional k-fold cross validation on the target data. In other words, we consider learning from scratch with enough target data for learning.

In [59]:
def runTraditionalCrossValidation(experimentDict: dict, logger = None) -> dict:
    """Given a dictionary specifying the experiment setup, it performs a k-fold cross-validation considering only the data from the target domain without simulating a scenario where there is little data available. It performs learning from scratch using all available target data and using the original RDN-Boost model."""

    experiment = experimentDict
    experimentID = experiment["id"]
    experimentBasePath = os.path.join(experiment["path"], experimentID)

    os.makedirs(experimentBasePath, exist_ok = True)

    with open(os.path.join(experimentBasePath, "setting.json"), "w") as f:
        json.dump(experiment, f)

    if not logger:
        logger = getLogger(experimentID, level = logging.DEBUG)

    logger.info("Parsing experiment parameters...")

    useRecursion = experiment.get("useRecursion", False)
    negPosRatio = experiment.get("negPosRatio", 1)
    randomSeed = experiment.get("randomSeed", 10)
    maxFailedNegSamplingRetries = experiment.get("maxFailedNegSamplingRetries", 50)
    numberOfClauses = experiment.get("numberOfClauses", 8)
    numberOfCycles = experiment.get("numberOfCycles", 100)
    maxTreeDepth = experiment.get("maxTreeDepth", 3)
    nEstimators = experiment.get("nEstimators", 10)
    nodeSize = experiment.get("nodeSize", 2)
    ignoreSTDOUT = experiment.get("ignoreSTDOUT", False)
    resetTargetPredicate = experiment.get("resetTargetPredicate", False)
    targetPredicate = experiment.get("targetPredicate", None)

    databasePath = experiment["databasePath"]
    allFolds = [os.path.basename(path) for path in glob(f"{databasePath}/fold*")]

    result = {}

    for fold in allFolds:
        experimentFoldPath = os.path.join(experimentBasePath, fold)
        os.makedirs(experimentFoldPath, exist_ok = True)

        logger.info(f"RUNNING EXPERIMENTS USING {fold.upper()} AS TEST FOLD...")

        databaseTestFold = fold
        databaseTrainFolds = list(set(allFolds) - set([databaseTestFold]))

        logger.info("Loading database for training...")
        logger.debug(f"Train folds: {databaseTrainFolds}")

        databaseTrain = loadDatabase(
            path = databasePath,
            folds = databaseTrainFolds, 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            resetTargetPredicate = resetTargetPredicate,
            targetPredicate = targetPredicate
        )

        logger.info("Loading database for testing...")
        logger.debug(f"Test fold: {databaseTestFold}")

        databaseTest = loadDatabase(
            path = databasePath,
            folds = [databaseTestFold], 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            resetTargetPredicate = resetTargetPredicate,
            targetPredicate = targetPredicate
        )

        targetRelation = databaseTrain.getTargetRelation()

        logger.debug(f"Target relation for database: {targetRelation}")

        result[databaseTestFold] = runSingleExperiment_OriginalRDNBoost(
            experimentPath = experimentFoldPath, 
            databaseTrain = databaseTrain,
            databaseTest = databaseTest,
            nEstimators = nEstimators,
            nodeSize = nodeSize,
            maxTreeDepth = maxTreeDepth,
            negPosRatio = negPosRatio,
            numberOfClauses = numberOfClauses,
            numberOfCycles = numberOfCycles,
            ignoreSTDOUT = ignoreSTDOUT,
            logger = logger        
        )

    metricsJSONPath = os.path.join(experimentBasePath, "metrics.json")
    logger.info(f"Storing performance metrics at {metricsJSONPath}.")

    allMetrics = {}
    for folds, foldResults in result.items():
        allMetrics[fold] = foldResults["metrics"]

    with open(metricsJSONPath, "w") as f:
        json.dump(allMetrics, f)

    logger.info("Experiment has been finished.")
        
    return {"RDNBoost": result}

In [60]:
experiments = {}
with open("experiments-noTransferCrossValidation.json") as f:
    experiments = json.load(f)
totalExperiments = len(experiments)
totalExperiments

7

In [61]:
# This function can be leveraged to prioritize experiments.
def skipExperiment(experimentDict):
    # The experiment has already been carried out.
    experimentID = experimentDict["id"]
    experimentPath = experimentDict['path']
    if os.path.exists(f"{experimentPath}/{experimentID}/metrics.json"):
        return True, "The experiment has already been carried out."
    return False, ""

In [62]:
start = 1 # An int greater or equal to 1
skippedExperiments = []
experimentsToRun = []
numProcesses = 3

for i, experimentDict in enumerate(experiments[start-1:], start = start):
    # TODO: I am temporarily ignoring other models. I  need to further run the other experiments
    shouldSkipExperiment, skipMessage = skipExperiment(experimentDict)
    experimentID = experimentDict["id"]
    if shouldSkipExperiment:
        skippedExperiments.append((experimentID, skipMessage))
    else:
        experimentsToRun.append(experimentDict)

len(experimentsToRun)

6

In [63]:
experimentsRunningMode = "parallel" # Either "parallel" or "sequential"

In [None]:
# Parallel execution of the experiments. 
# TODO: We get a Kernel Crash and it occurs only when we import the models from srlearn.rdn under Python 3.8.10. In our tests, this problem is solved when running over Python 3.10.5, but the reason why it does not works under Python 3.8.10 is still unknown.
 
if experimentsRunningMode == "parallel":
    def safePrint(message, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        consoleOutputLock.acquire()
        print(message)
        consoleOutputLock.release()

    def experimentWorker(experimentDict: dict, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        experimentID = experimentDict["id"]
        safePrint(f"Starting experiment {experimentID}...", consoleOutputLock)
        try:
            experimentPath = f"{experimentDict['path']}/{experimentID}"
            os.makedirs(experimentPath, exist_ok = True)    
            logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log", consoleOutput = False)
            logger.info(f"RUNNING EXPERIMENT {i}/{totalExperiments}...")
            experimentResult = runTraditionalCrossValidation(experimentDict, logger = logger)
            experimentResultSummarization(experimentResult, logger = logger)
            safePrint(f"Experiment finished successfully: {experimentID}...", consoleOutputLock)
        except Exception as e:
            safePrint(f"The following exception was raised while running the experiment {experimentID}: {e}. Check the logs in the experiment directory for more details.", consoleOutputLock)
            raise e

    with ProcessPoolExecutor(max_workers = 8) as p:
        with multiprocessing.Manager() as manager:
            consoleOutputLock = manager.Lock()
            futures = p.map(experimentWorker, experimentsToRun, [consoleOutputLock for experiment in experimentsToRun])
            for result in futures:
                pass

In [None]:
# Sequential execution of the experiments. 
if experimentsRunningMode == "sequential":
    totalExperimentsToRun = len(experimentsToRun)
    for i, experimentDict in enumerate(experimentsToRun, start = 1):
        experimentID = experimentDict["id"]
        experimentPath = f"{experimentDict['path']}/{experimentID}"
        os.makedirs(experimentPath, exist_ok = True)    
        logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log")
        logger.info(f"RUNNING EXPERIMENT {i}/{totalExperimentsToRun}...")
        experimentResult = runTraditionalCrossValidation(experimentDict, logger = logger)
        experimentResultSummarization(experimentResult, logger = logger)
        clear_output(wait = True)

## **Progressive Target Data Availability**

It evaluates how target data availability impacts the performance of our instance-based transfer learning model. We only consider the best settings for each pair of source and target domains, according to the results from the cross validation for transfer settings. In particular, we define the best setting based on the AUC PR.

In [7]:
def experimentResultSummarization(experimentResult: dict, logger: logging.Logger = None, ):
    if not logger:
        logger = getLogger("Result summarization")

    logger.info("Extracting performance metrics from experiment results:")
    
    metricsData = []
    
    for model, proportionResults in experimentResult.items():
        for proportionStr, foldResults in proportionResults.items():
            for fold, resultsDict in foldResults.items():
                metrics = resultsDict["metrics"]
                for metricName, metricValue in metrics.items():
                    metricsData.append([model, proportionStr, fold, metricName, float(metricValue)])

    metricsColumns = ["model", "proportion", "fold", "metric", "value"]
    metricsDF = pd.DataFrame(data = metricsData, columns = metricsColumns)

    summaryDF = metricsDF.groupby(["model", "proportion", "metric"])["value"].agg(["mean", "std"]).reset_index()

    for row in summaryDF.itertuples():
        logger.info(f"{row.model} | {row.proportion}: {row.metric} = {row.mean:.4f} +- {row.std:.4f}")

In [8]:
def runLearningCurve(experimentDict: dict, logger = None) -> dict:
    """Given a dict specifying the experiment setting, it runs the k-fold cross validation. At each iteration k, the k-th fold will be used to evaluate the model and the remaining will be used for training. The train set from the target domain is shuffled and divided into 5 subgroups. We progressively add each of them in the final train set, so generating a learning curve on the amount of target data available for training. This experiment is similar to that described in page 73 from Rodrigo's work [1].

    [1] https://cos.ufrj.br/uploadfile/publicacao/2903.pdf
    """

    experiment = experimentDict
    experimentID = experiment["id"]
    experimentBasePath = os.path.join(experiment["path"], experimentID)

    os.makedirs(experimentBasePath, exist_ok = True)

    with open(os.path.join(experimentBasePath, "setting.json"), "w") as f:
        json.dump(experiment, f)

    if not logger:
        logger = getLogger(experimentID, level = logging.DEBUG)

    logger.info("Parsing experiment parameters...")

    useRecursion = experiment.get("useRecursion", False)
    negPosRatio = experiment.get("negPosRatio", 1)
    randomSeed = experiment.get("randomSeed", 10)
    maxFailedNegSamplingRetries = experiment.get("maxFailedNegSamplingRetries", 50)
    numberOfClauses = experiment.get("numberOfClauses", 8)
    numberOfCycles = experiment.get("numberOfCycles", 100)
    maxTreeDepth = experiment.get("maxTreeDepth", 3)
    nEstimators = experiment.get("nEstimators", 10)
    nodeSize = experiment.get("nodeSize", 2)
    sourceUtilityAlpha = experiment.get("sourceUtilityAlpha", 1)
    targetUtilityAlpha = experiment.get("targetUtilityAlpha", 1)
    utilityAlphaSetIter = experiment.get("utilityAlphaSetIter", 1)
    runOriginalRDNBoost = experiment.get("runOriginalRDNBoost", False)
    runTransferLearning = experiment.get("runTransferLearning", False)
    runAnalogousToOriginalRDNBoost = experiment.get("runAnalogousToOriginalRDNBoost", False)
    runTreeBoostler = experiment.get("runTreeBoostler", False)
    ignoreSTDOUT = experiment.get("ignoreSTDOUT", False)
    searchArgPermutation = experiment.get("searchArgPermutation", True)
    allowSameTargetMap = experiment.get("allowSameTargetMap", False)
    refine = experiment.get("refine", True)
    maxRevisionIterations = experiment.get("maxRevisionIterations", 1)

    anyModelIsSet = runOriginalRDNBoost or runTransferLearning or runAnalogousToOriginalRDNBoost or runTreeBoostler
    assert anyModelIsSet, "No model to run. `runOriginalRDNBoost`, `runTransferLearning`, and `analogousToOriginalRDNBoost` can not be set to False simultaneously."

    if runTransferLearning or runTreeBoostler:
        logger.info("Loading source database...")
        
        sourceDatabase = loadDatabase(
            folds = None, 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            **experiment["sourceDatabase"]
        )

        if runTransferLearning:
            weightFactory = WeightFactory()
            weightStrategy = weightFactory.getWeightStrategy(experiment["weight"]["strategy"], **experiment["weight"]["parameters"])
        
            relationMapping = experiment["mapping"]["relationMapping"]
            termTypeMapping = experiment["mapping"]["termTypeMapping"]

    targetDatabasePath = experiment["targetDatabase"]["path"]
    allTargetFolds = [os.path.basename(path) for path in glob(f"{targetDatabasePath}/fold*")]

    result = {}

    for fold in allTargetFolds:
        logger.info(f"RUNNING EXPERIMENTS USING {fold.upper()} AS TESTING FOLD...")

        targetDatabaseTestFold = fold
        targetDatabaseTrainFolds = list(set(allTargetFolds) - set([targetDatabaseTestFold]))

        logger.info("Loading target database for testing...")
        logger.debug(f"Test fold: {targetDatabaseTestFold}")

        targetDatabaseTest = loadDatabase(
            folds = [targetDatabaseTestFold], 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            **experiment["targetDatabase"]
        )

        logger.info("Loading target database for training...")
        logger.debug(f"Train folds: {targetDatabaseTrainFolds}")

        targetDatabaseTrain = loadDatabase(
            folds = targetDatabaseTrainFolds, 
            useRecursion = useRecursion, 
            logger = logger,
            negPosRatio = negPosRatio,
            maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
            **experiment["targetDatabase"]
        )

        targetDomainTargetRelation = targetDatabaseTrain.getTargetRelation()

        logger.debug(f"Target relation for target database: {targetDomainTargetRelation}")

        numTrainSplits = 5

        # TODO: Implement random seed to guarantee reproducibility.
        targetDatabaseTrainSplits = iter(Database.getKFolds(
            targetDatabaseTrain, 
            numFolds = numTrainSplits,
            shuffle = True
        ))

        targetDatabaseTrain = next(targetDatabaseTrainSplits)

        for trainSplit in range(1, numTrainSplits + 1):
            targetTrainProportion = (1/numTrainSplits)*trainSplit

            logger.info(f"Target train set proportion: {targetTrainProportion:.2f}")

            trainProportionStr = f"trainProportion-{targetTrainProportion:.2f}"
            experimentFoldPath = os.path.join(experimentBasePath, fold, trainProportionStr)
            os.makedirs(experimentFoldPath, exist_ok = True)

            if runOriginalRDNBoost:
                result["originalRDNBoost"] = result.get("originalRDNBoost", {})
                result["originalRDNBoost"][trainProportionStr] = result["originalRDNBoost"].get(trainProportionStr, {})
                result["originalRDNBoost"][trainProportionStr][fold] = runSingleExperiment_OriginalRDNBoost(
                    experimentPath = experimentFoldPath, 
                    databaseTrain = targetDatabaseTrain,
                    databaseTest = targetDatabaseTest,
                    nEstimators = nEstimators,
                    nodeSize = nodeSize,
                    maxTreeDepth = maxTreeDepth,
                    negPosRatio = negPosRatio,
                    numberOfClauses = numberOfClauses,
                    numberOfCycles = numberOfCycles,
                    ignoreSTDOUT = ignoreSTDOUT,
                    logger = logger        
                )

            if runAnalogousToOriginalRDNBoost:
                result["analogousToOriginalRDNBoost"] = result.get("analogousToOriginalRDNBoost", {})
                result["analogousToOriginalRDNBoost"][trainProportionStr] = result["analogousToOriginalRDNBoost"].get(trainProportionStr, {})
                result["analogousToOriginalRDNBoost"][trainProportionStr][fold] = runSingleExperiment_AnalogousToRDNBoost(
                    experimentPath = experimentFoldPath, 
                    databaseTrain = targetDatabaseTrain,
                    databaseTest = targetDatabaseTest,
                    nEstimators = nEstimators,
                    nodeSize = nodeSize,
                    maxTreeDepth = maxTreeDepth,
                    negPosRatio = negPosRatio,
                    numberOfClauses = numberOfClauses,
                    numberOfCycles = numberOfCycles,
                    ignoreSTDOUT = ignoreSTDOUT,
                    logger = logger        
                )

            if runTransferLearning:
                result["transferLearning"] = result.get("transferLearning", {})
                result["transferLearning"][trainProportionStr] = result["transferLearning"].get(trainProportionStr, {})
                result["transferLearning"][trainProportionStr][fold] = runSingleExperiment_TransferLearning(
                    experimentPath = experimentFoldPath,
                    sourceDatabase = sourceDatabase,
                    targetDatabaseTrain = targetDatabaseTrain,
                    targetDatabaseTest = targetDatabaseTest,
                    nEstimators = nEstimators,
                    nodeSize = nodeSize,
                    maxTreeDepth = maxTreeDepth,
                    negPosRatio = negPosRatio,
                    numberOfClauses = numberOfClauses,
                    numberOfCycles = numberOfCycles,
                    ignoreSTDOUT = ignoreSTDOUT,
                    useRecursion = useRecursion,
                    randomSeed = randomSeed,
                    maxFailedNegSamplingRetries = maxFailedNegSamplingRetries,
                    weightStrategy = weightStrategy,
                    sourceUtilityAlpha = sourceUtilityAlpha,
                    targetUtilityAlpha = targetUtilityAlpha,
                    utilityAlphaSetIter = utilityAlphaSetIter,
                    relationMapping = relationMapping,
                    termTypeMapping = termTypeMapping,
                    logger = logger,
                )

            if runTreeBoostler:
                result["treeBoostler"] = result.get("treeBoostler", {})
                result["treeBoostler"][trainProportionStr] = result["treeBoostler"].get(trainProportionStr, {})
                result["treeBoostler"][trainProportionStr][fold] = runSingleExperiment_TreeBoostler(
                    experimentPath = experimentFoldPath,
                    sourceDatabase = sourceDatabase,
                    targetDatabaseTrain = targetDatabaseTrain,
                    targetDatabaseTest = targetDatabaseTest,
                    nEstimators = nEstimators,
                    nodeSize = nodeSize,
                    maxTreeDepth = maxTreeDepth,
                    negPosRatio = negPosRatio,
                    numberOfClauses = numberOfClauses,
                    numberOfCycles = numberOfCycles,
                    ignoreSTDOUT = ignoreSTDOUT,
                    searchArgPermutation = searchArgPermutation,
                    allowSameTargetMap = allowSameTargetMap,
                    refine = refine,
                    maxRevisionIterations = maxRevisionIterations,
                    logger = logger,
                )
            
            if trainSplit < numTrainSplits:
                targetDatabaseTrain = targetDatabaseTrain.merge(next(targetDatabaseTrainSplits))

    metricsJSONPath = os.path.join(experimentBasePath, "metrics.json")
    logger.info(f"Storing performance metrics at {metricsJSONPath}.")

    allMetrics = {}
    for model, foldsResults in result.items():
        allMetrics[model] = {}
        for fold, trainProportionsResults in foldsResults.items():
            allMetrics[model][fold] = {}
            for trainProportion, resultsDict in trainProportionsResults.items():
                allMetrics[model][fold][trainProportion] = resultsDict["metrics"]

    with open(metricsJSONPath, "w") as f:
        json.dump(allMetrics, f)

    logger.info("Experiment has been finished.")
        
    return result

In [9]:
experimentsLearningCurve = {}
with open("experiments-learningCurve.json") as f:
    experimentsLearningCurve = json.load(f)
totalExperimentsLearningCurve = len(experimentsLearningCurve)
totalExperimentsLearningCurve

21

In [10]:
# This function can be leveraged to prioritize experiments.
def skipExperiment(experimentDict):
    # The experiment has already been carried out.
    experimentID = experimentDict["id"]
    experimentPath = experimentDict['path']
    if os.path.exists(f"{experimentPath}/{experimentID}/metrics.json"):
        return True, "The experiment has already been carried out."
    return False, ""

In [11]:
start = 1 # An int greater or equal to 1
skippedExperiments = []
experimentsToRun = []
numProcesses = 3

for i, experimentDict in enumerate(experimentsLearningCurve[start-1:], start = start):
    # TODO: I am temporarily ignoring other models. I  need to further run the other experiments
    shouldSkipExperiment, skipMessage = skipExperiment(experimentDict)
    experimentID = experimentDict["id"]
    if shouldSkipExperiment:
        skippedExperiments.append((experimentID, skipMessage))
    else:
        experimentsToRun.append(experimentDict)

totalExperimentsToRun = len(experimentsToRun)
totalExperimentsToRun

1

In [12]:
experimentsRunningMode = "parallel"

In [13]:
# Parallel execution of the experiments. 
# TODO: We get a Kernel Crash and it occurs only when we import the models from srlearn.rdn under Python 3.8.10. In our tests, this problem is solved when running over Python 3.10.5, but the reason why it does not works under Python 3.8.10 is still unknown.
 
if experimentsRunningMode == "parallel":
    def safePrint(message, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        consoleOutputLock.acquire()
        print(message)
        consoleOutputLock.release()

    def experimentWorker(experimentDict: dict, consoleOutputLock: multiprocessing.managers.AcquirerProxy):
        experimentID = experimentDict["id"]
        safePrint(f"Starting experiment {experimentID}...", consoleOutputLock)
        try:
            experimentPath = f"{experimentDict['path']}/{experimentID}"
            os.makedirs(experimentPath, exist_ok = True)    
            logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log", consoleOutput = False)
            logger.info(f"RUNNING EXPERIMENT {i}/{totalExperimentsToRun}...")
            experimentResult = runLearningCurve(experimentDict, logger = logger)
            experimentResultSummarization(experimentResult, logger = logger)
            safePrint(f"Experiment finished successfully: {experimentID}...", consoleOutputLock)
        except Exception as e:
            safePrint(f"The following exception was raised while running the experiment {experimentID}: {e}. Check the logs in the experiment directory for more details.", consoleOutputLock)
            raise e

    with ProcessPoolExecutor(max_workers = 8) as p:
        with multiprocessing.Manager() as manager:
            consoleOutputLock = manager.Lock()
            futures = p.map(experimentWorker, experimentsToRun, [consoleOutputLock for experiment in experimentsToRun])
            for result in futures:
                pass

Starting experiment 816f88e322ad9332974b410b99743c591cfb28291770844b23d5ab2e50cf7967...


Experiment finished successfully: 816f88e322ad9332974b410b99743c591cfb28291770844b23d5ab2e50cf7967...


In [None]:
# Sequential execution of the experiments. 
if experimentsRunningMode == "sequential":
    totalExperimentsToRun = len(experimentsToRun)
    for i, experimentDict in enumerate(experimentsToRun, start = 1):
        experimentID = experimentDict["id"]
        experimentPath = f"{experimentDict['path']}/{experimentID}"
        os.makedirs(experimentPath, exist_ok = True)    
        logger = getLogger(experimentID, logFile = f"{experimentPath}/experiment.log")
        logger.info(f"RUNNING EXPERIMENT {i}/{totalExperimentsToRun}...")
        experimentResult = runLearningCurve(experimentDict, logger = logger)
        experimentResultSummarization(experimentResult, logger = logger)
        clear_output(wait = True)