In [3]:
"""
Script that trains sklearn models on HOPV dataset.
"""
from __future__ import print_function
from __future__ import division
from __future__ import unicode_literals

from contextlib import contextmanager
import sys, os

@contextmanager
def suppress_stderr():
    with open(os.devnull, "w") as devnull:
        old_stderr = sys.stderr
        sys.stderr = devnull
        try:  
            yield
        finally:
            sys.stderr = old_stderr

with suppress_stderr():            
    import numpy as np
    import deepchem as dc
    import pandas as pd
    from deepchem.molnet import load_hopv
    from sklearn.ensemble import RandomForestRegressor

In [4]:
# Load HOPV dataset
hopv_tasks, hopv_datasets, transformers = dc.molnet.load_hopv(featurizer = 'ECFP', splitter = None)
splitter = dc.splits.ScaffoldSplitter()
train_dataset, valid_dataset, test_dataset = splitter.train_valid_test_split(hopv_datasets[0])

In [5]:
def model_builder(model_dir):
  sklearn_model = RandomForestRegressor(n_estimators=500)
  return dc.models.SklearnModel(sklearn_model, model_dir)


model = dc.models.SingletaskToMultitask(hopv_tasks, model_builder)

In [6]:
# Fit trained model
print("About to fit model")
model.fit(train_dataset)
model.save()
predicted = model.predict(valid_dataset)


About to fit model


In [None]:
#the avarage score is:
metric = dc.metrics.Metric(dc.metrics.pearson_r2_score, mode="regression")
print("training score:",model.evaluate(train_dataset, metric, transformers))
print("valid score:",model.evaluate(valid_dataset, metric, transformers))

for i in range(0,8):
    x = np.concatenate( valid_dataset.y[:,[i]], axis=0 )
    y = np.concatenate( predicted[:,[i]], axis=0 )
    print("pearson r2 valid score for:",valid_dataset.tasks[i],"is",dc.metrics.pearson_r2_score(x,y))

training score: {'pearson_r2_score': 0.40445718254258284}
valid score: {'pearson_r2_score': 0.2238404376377698}
pearson r2 valid score for: HOMO is 0.015196617078727126
pearson r2 valid score for: LUMO is 0.03757244597222908
pearson r2 valid score for: electrochemical_gap is 0.0022112574045139934
pearson r2 valid score for: optical_gap is 0.0035849562834221064
pearson r2 valid score for: PCE is 0.5906037603201933
pearson r2 valid score for: V_OC is 0.2107968830893046
pearson r2 valid score for: J_SC is 0.5045504078503797
pearson r2 valid score for: fill_factor is 0.42620717310338896
