In [6]:
from deepmol.pipeline import Pipeline

pipeline = Pipeline.load('mhfp/trial_38')

In [7]:
from deepmol.loaders import CSVLoader
import numpy as np


def fit_and_evaluate(pipeline):
    train = CSVLoader("train.csv",
                        labels_fields=['C00073', 'C00078', 'C00079', 'C00082', 'C00235', 'C00341', 'C00353',
                                        'C00448', 'C01789', 'C03506', 'C00047', 'C00108', 'C00187', 'C00148',
                                        'C00041', 'C00129', 'C00062', 'C01852', 'C00049', 'C00135', 'C00223',
                                        'C00509', 'C00540', 'C01477', 'C05903', 'C05904', 'C05905', 'C05908',
                                        'C09762'],
                        id_field="ids", smiles_field="smiles").create_dataset()
    valid = CSVLoader("valid.csv",
                        labels_fields=['C00073', 'C00078', 'C00079', 'C00082', 'C00235', 'C00341', 'C00353',
                                        'C00448', 'C01789', 'C03506', 'C00047', 'C00108', 'C00187', 'C00148',
                                        'C00041', 'C00129', 'C00062', 'C01852', 'C00049', 'C00135', 'C00223',
                                        'C00509', 'C00540', 'C01477', 'C05903', 'C05904', 'C05905', 'C05908',
                                        'C09762'],
                        id_field="ids", smiles_field="smiles").create_dataset()
    test = CSVLoader("test.csv",
                        labels_fields=['C00073', 'C00078', 'C00079', 'C00082', 'C00235', 'C00341', 'C00353',
                                        'C00448', 'C01789', 'C03506', 'C00047', 'C00108', 'C00187', 'C00148',
                                        'C00041', 'C00129', 'C00062', 'C01852', 'C00049', 'C00135', 'C00223',
                                        'C00509', 'C00540', 'C01477', 'C05903', 'C05904', 'C05905', 'C05908',
                                        'C09762'],
                        id_field="ids", smiles_field="smiles").create_dataset()

    train_valid = train.merge([valid])
    pipeline.fit(train_valid)
    from sklearn.metrics import f1_score, precision_score, recall_score
    from deepmol.metrics import Metric


    def macro_f1_score(y_true, y_pred):
        return f1_score(y_true, y_pred, average='macro')

    def macro_precision_score(y_true, y_pred):
        return precision_score(y_true, y_pred, average='macro')

    def macro_recall_score(y_true, y_pred):
        return recall_score(y_true, y_pred, average='macro')


    results_test = pipeline.evaluate(test, metrics=[Metric(macro_f1_score), Metric(macro_precision_score), Metric(macro_recall_score)], per_task_metrics=False)
    predictions = pipeline.predict(test)
    f1_scores = []
    recall_scores = []
    precision_scores = []
    test = pipeline.transform(test)
    for i in range(predictions.shape[1]):
        f1_score_task = f1_score(test.y[:, i], predictions[:, i])
        recall_score_task = recall_score(test.y[:, i], predictions[:, i])
        precision_score_task = precision_score(test.y[:, i], predictions[:, i])
        f1_scores.append(f1_score_task)
        recall_scores.append(recall_score_task)
        precision_scores.append(precision_score_task)
    
    results_test[0]['f1_scores_std'] = np.array(f1_scores).std()
    results_test[0]['recall_scores_std'] = np.array(recall_scores).std()
    results_test[0]['precision_scores_std'] = np.array(precision_scores).std()

    return results_test

In [None]:
fit_and_evaluate(pipeline=pipeline)