Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1965 from deepchem/transformer_docs
Assorted Fixes
- Loading branch information
Showing
7 changed files
with
279 additions
and
246 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,122 +0,0 @@ | ||
""" | ||
General API for testing dataset objects | ||
""" | ||
__author__ = "Bharath Ramsundar" | ||
__copyright__ = "Copyright 2016, Stanford University" | ||
__license__ = "MIT" | ||
|
||
import unittest | ||
import tempfile | ||
import os | ||
import shutil | ||
import numpy as np | ||
import deepchem as dc | ||
|
||
|
||
def load_solubility_data(): | ||
"""Loads solubility dataset""" | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
featurizer = dc.feat.CircularFingerprint(size=1024) | ||
tasks = ["log-solubility"] | ||
task_type = "regression" | ||
input_file = os.path.join(current_dir, "../../models/tests/example.csv") | ||
loader = dc.data.CSVLoader( | ||
tasks=tasks, smiles_field="smiles", featurizer=featurizer) | ||
|
||
return loader.featurize(input_file) | ||
|
||
|
||
def load_butina_data(): | ||
"""Loads solubility dataset""" | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
featurizer = dc.feat.CircularFingerprint(size=1024) | ||
tasks = ["task"] | ||
# task_type = "regression" | ||
input_file = os.path.join(current_dir, | ||
"../../models/tests/butina_example.csv") | ||
loader = dc.data.CSVLoader( | ||
tasks=tasks, smiles_field="smiles", featurizer=featurizer) | ||
|
||
return loader.featurize(input_file) | ||
|
||
|
||
def load_multitask_data(): | ||
"""Load example multitask data.""" | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
featurizer = dc.feat.CircularFingerprint(size=1024) | ||
tasks = [ | ||
"task0", "task1", "task2", "task3", "task4", "task5", "task6", "task7", | ||
"task8", "task9", "task10", "task11", "task12", "task13", "task14", | ||
"task15", "task16" | ||
] | ||
input_file = os.path.join(current_dir, | ||
"../../models/tests/multitask_example.csv") | ||
loader = dc.data.CSVLoader( | ||
tasks=tasks, smiles_field="smiles", featurizer=featurizer) | ||
return loader.featurize(input_file) | ||
|
||
|
||
def load_classification_data(): | ||
"""Loads classification data from example.csv""" | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
featurizer = dc.feat.CircularFingerprint(size=1024) | ||
tasks = ["outcome"] | ||
task_type = "classification" | ||
input_file = os.path.join(current_dir, | ||
"../../models/tests/example_classification.csv") | ||
loader = dc.data.CSVLoader( | ||
tasks=tasks, smiles_field="smiles", featurizer=featurizer) | ||
return loader.featurize(input_file) | ||
|
||
|
||
def load_sparse_multitask_dataset(): | ||
"""Load sparse tox multitask data, sample dataset.""" | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
featurizer = dc.feat.CircularFingerprint(size=1024) | ||
tasks = [ | ||
"task1", "task2", "task3", "task4", "task5", "task6", "task7", "task8", | ||
"task9" | ||
] | ||
input_file = os.path.join(current_dir, | ||
"../../models/tests/sparse_multitask_example.csv") | ||
loader = dc.data.CSVLoader( | ||
tasks=tasks, smiles_field="smiles", featurizer=featurizer) | ||
return loader.featurize(input_file) | ||
|
||
|
||
def load_feat_multitask_data(): | ||
"""Load example with numerical features, tasks.""" | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
features = ["feat0", "feat1", "feat2", "feat3", "feat4", "feat5"] | ||
featurizer = dc.feat.UserDefinedFeaturizer(features) | ||
tasks = ["task0", "task1", "task2", "task3", "task4", "task5"] | ||
input_file = os.path.join(current_dir, | ||
"../../models/tests/feat_multitask_example.csv") | ||
loader = dc.data.UserCSVLoader( | ||
tasks=tasks, featurizer=featurizer, id_field="id") | ||
return loader.featurize(input_file) | ||
|
||
|
||
def load_gaussian_cdf_data(): | ||
"""Load example with numbers sampled from Gaussian normal distribution. | ||
Each feature and task is a column of values that is sampled | ||
from a normal distribution of mean 0, stdev 1.""" | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
features = ["feat0", "feat1"] | ||
featurizer = dc.feat.UserDefinedFeaturizer(features) | ||
tasks = ["task0", "task1"] | ||
input_file = os.path.join(current_dir, | ||
"../../models/tests/gaussian_cdf_example.csv") | ||
loader = dc.data.UserCSVLoader( | ||
tasks=tasks, featurizer=featurizer, id_field="id") | ||
return loader.featurize(input_file) | ||
|
||
|
||
def load_unlabelled_data(): | ||
current_dir = os.path.dirname(os.path.abspath(__file__)) | ||
featurizer = dc.feat.CircularFingerprint(size=1024) | ||
tasks = [] | ||
input_file = os.path.join(current_dir, "../../data/tests/no_labels.csv") | ||
loader = dc.data.CSVLoader( | ||
tasks=tasks, smiles_field="smiles", featurizer=featurizer) | ||
return loader.featurize(input_file) | ||
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.