In [1]:
import datetime
import logging
import json
import typing
logging.basicConfig(level=logging.INFO)

# Load primitives

In [2]:
begin = datetime.datetime.utcnow()
import d3m.primitives
import d3m.primitives.data_transformation.dataset_to_dataframe
import d3m.primitives.data_transformation.column_parser
import d3m.primitives.data_transformation.extract_columns_by_semantic_types
import d3m.primitives.data_transformation.cast_to_type
import d3m.primitives.data_cleaning.imputer
import d3m.primitives.classification.random_forest
import d3m.primitives.data_transformation.construct_predictions
import d3m.primitives.data_transformation.denormalize
(datetime.datetime.utcnow() - begin).total_seconds()

1.341315

In [3]:
import d3m.index
d3m.index.search()

['d3m.primitives.bbn.sklearn_wrap.BBNMLPClassifier',
 'd3m.primitives.bbn.time_series.AudioReader',
 'd3m.primitives.bbn.time_series.BBNTfidfTransformer',
 'd3m.primitives.bbn.time_series.ChannelAverager',
 'd3m.primitives.bbn.time_series.ClusterCurveFittingKMeans',
 'd3m.primitives.bbn.time_series.IVectorExtractor',
 'd3m.primitives.bbn.time_series.SegmentCurveFitter',
 'd3m.primitives.bbn.time_series.SequenceToBagOfTokens',
 'd3m.primitives.bbn.time_series.SignalDither',
 'd3m.primitives.bbn.time_series.SignalFramer',
 'd3m.primitives.bbn.time_series.SignalMFCC',
 'd3m.primitives.bbn.time_series.TargetsReader',
 'd3m.primitives.bbn.time_series.UniformSegmentation',
 'd3m.primitives.classification.bagging.SKlearn',
 'd3m.primitives.classification.bayesian_logistic_regression.Common',
 'd3m.primitives.classification.bernoulli_naive_bayes.SKlearn',
 'd3m.primitives.classification.decision_tree.SKlearn',
 'd3m.primitives.classification.dummy.SKlearn',
 'd3m.primitives.classification.extr

# Load data

In [4]:
with open('/d3m/data/seed_datasets_current/uu4_SPECT/TRAIN/dataset_TRAIN/datasetDoc.json') as fp:
    doc = json.load(fp)
doc

{'about': {'datasetID': 'uu4_SPECT_dataset_TRAIN',
  'datasetName': 'NULL',
  'license': 'CC-BY license',
  'datasetSchemaVersion': '3.2.0',
  'redacted': True,
  'datasetVersion': '2.0',
  'digest': 'aa1e58249560fe2abaa24793273accc0bc81af95ebf5e38e2cee03de3b0daa5c'},
 'dataResources': [{'resID': 'learningData',
   'resPath': 'tables/learningData.csv',
   'resType': 'table',
   'resFormat': ['text/csv'],
   'isCollection': False,
   'columns': [{'colIndex': 0,
     'colName': 'd3mIndex',
     'colType': 'integer',
     'role': ['index']},
    {'colIndex': 1,
     'colName': 'OVERALL_DIAGNOSIS',
     'colType': 'categorical',
     'role': ['suggestedTarget']},
    {'colIndex': 2,
     'colName': 'F1R',
     'colType': 'integer',
     'role': ['attribute']},
    {'colIndex': 3,
     'colName': 'F1S',
     'colType': 'integer',
     'role': ['attribute']},
    {'colIndex': 4,
     'colName': 'F2R',
     'colType': 'integer',
     'role': ['attribute']},
    {'colIndex': 5,
     'colName':

In [5]:
[(c['role'], c['colName']) for c in doc['dataResources'][0]['columns']]

[(['index'], 'd3mIndex'),
 (['suggestedTarget'], 'OVERALL_DIAGNOSIS'),
 (['attribute'], 'F1R'),
 (['attribute'], 'F1S'),
 (['attribute'], 'F2R'),
 (['attribute'], 'F2S'),
 (['attribute'], 'F3R'),
 (['attribute'], 'F3S'),
 (['attribute'], 'F4R'),
 (['attribute'], 'F4S'),
 (['attribute'], 'F5R'),
 (['attribute'], 'F5S'),
 (['attribute'], 'F6R'),
 (['attribute'], 'F6S'),
 (['attribute'], 'F7R'),
 (['attribute'], 'F7S'),
 (['attribute'], 'F8R'),
 (['attribute'], 'F8S'),
 (['attribute'], 'F9R'),
 (['attribute'], 'F9S'),
 (['attribute'], 'F10R'),
 (['attribute'], 'F10S'),
 (['attribute'], 'F11R'),
 (['attribute'], 'F11S'),
 (['attribute'], 'F12R'),
 (['attribute'], 'F12S'),
 (['attribute'], 'F13R'),
 (['attribute'], 'F13S'),
 (['attribute'], 'F14R'),
 (['attribute'], 'F14S'),
 (['attribute'], 'F15R'),
 (['attribute'], 'F15S'),
 (['attribute'], 'F16R'),
 (['attribute'], 'F16S'),
 (['attribute'], 'F17R'),
 (['attribute'], 'F17S'),
 (['attribute'], 'F18R'),
 (['attribute'], 'F18S'),
 (['attribu

In [6]:
from d3m.container import Dataset
from d3m.metadata.base import ALL_ELEMENTS

dataset = Dataset.load('file:///d3m/data/seed_datasets_current/uu4_SPECT/TRAIN/dataset_TRAIN/datasetDoc.json')
dataset

Dataset(id='uu4_SPECT_dataset_TRAIN', name='NULL', location_uris='('file:///d3m/data/seed_datasets_current/uu4_SPECT/TRAIN/dataset_TRAIN/datasetDoc.json',)')

In [7]:
list(dataset)

['learningData']

In [8]:
dataset['learningData'].head()

Unnamed: 0,d3mIndex,OVERALL_DIAGNOSIS,F1R,F1S,F2R,F2S,F3R,F3S,F4R,F4S,...,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22
0,2,1,71,62,70,64,67,64,79,65,...,1,0,0,0,0,0,0,0,0,0
1,3,1,69,71,70,78,61,63,67,65,...,0,0,0,0,0,0,0,1,1,1
2,4,1,70,66,61,66,61,58,69,69,...,1,0,1,1,0,0,0,0,0,0
3,5,1,57,69,68,75,69,74,73,71,...,1,1,0,1,0,0,0,1,0,1
4,7,1,61,60,60,62,64,72,68,67,...,1,0,0,0,0,0,0,0,0,1


In [9]:
dataset.metadata.query(['learningData', ALL_ELEMENTS, 0])['semantic_types']

('http://schema.org/Integer',
 'https://metadata.datadrivendiscovery.org/types/PrimaryKey')

In [10]:
dict(dataset.metadata.query(['learningData', ALL_ELEMENTS, 1]))['semantic_types']

('https://metadata.datadrivendiscovery.org/types/CategoricalData',
 'https://metadata.datadrivendiscovery.org/types/SuggestedTarget')

In [11]:
dataset.metadata.query(['learningData', ALL_ELEMENTS, 3])['semantic_types']

('http://schema.org/Integer',
 'https://metadata.datadrivendiscovery.org/types/Attribute')

In [12]:
# Set target: column 1 (OVERALL_DIAGNOSIS)
target_types = set(dataset.metadata.query(['learningData', ALL_ELEMENTS, 1])['semantic_types'])
target_types.add('https://metadata.datadrivendiscovery.org/types/Target')
dataset.metadata = dataset.metadata.update(
    ['learningData', ALL_ELEMENTS, 1],
    {
        'semantic_types': tuple(target_types)
    }
)

In [13]:
dict(dataset.metadata.query(['learningData', ALL_ELEMENTS, 1]))['semantic_types']

('https://metadata.datadrivendiscovery.org/types/SuggestedTarget',
 'https://metadata.datadrivendiscovery.org/types/Target',
 'https://metadata.datadrivendiscovery.org/types/CategoricalData')

In [14]:
d3m.primitives.data_transformation.denormalize.Common.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [15]:
hyperparams_class = d3m.primitives.data_transformation.denormalize.Common.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step0 = d3m.primitives.data_transformation.denormalize.Common(hyperparams=hyperparams)
res = step0.produce(inputs=dataset)
assert res.has_finished
step0_produce = res.value

In [16]:
d3m.primitives.data_transformation.dataset_to_dataframe.Common.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [17]:
hyperparams_class = d3m.primitives.data_transformation.dataset_to_dataframe.Common.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step1 = d3m.primitives.data_transformation.dataset_to_dataframe.Common(hyperparams=hyperparams)
res = step1.produce(inputs=step0_produce)
assert res.has_finished
step1_produce = res.value
step1_produce.head()

Unnamed: 0,d3mIndex,OVERALL_DIAGNOSIS,F1R,F1S,F2R,F2S,F3R,F3S,F4R,F4S,...,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22
0,2,1,71,62,70,64,67,64,79,65,...,1,0,0,0,0,0,0,0,0,0
1,3,1,69,71,70,78,61,63,67,65,...,0,0,0,0,0,0,0,1,1,1
2,4,1,70,66,61,66,61,58,69,69,...,1,0,1,1,0,0,0,0,0,0
3,5,1,57,69,68,75,69,74,73,71,...,1,1,0,1,0,0,0,1,0,1
4,7,1,61,60,60,62,64,72,68,67,...,1,0,0,0,0,0,0,0,0,1


In [18]:
d3m.primitives.data_transformation.column_parser.DataFrameCommon.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [19]:
hyperparams_class = d3m.primitives.data_transformation.column_parser.DataFrameCommon.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step2 = d3m.primitives.data_transformation.column_parser.DataFrameCommon(hyperparams=hyperparams)
res = step2.produce(inputs=step1_produce)
assert res.has_finished
step2_produce = res.value
step2_produce.head()

Unnamed: 0,d3mIndex,OVERALL_DIAGNOSIS,F1R,F1S,F2R,F2S,F3R,F3S,F4R,F4S,...,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22
0,2,1,71,62,70,64,67,64,79,65,...,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003
1,3,1,69,71,70,78,61,63,67,65,...,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985,13008020892406799985,13008020892406799985
2,4,1,70,66,61,66,61,58,69,69,...,13008020892406799985,2073085640542932003,13008020892406799985,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003
3,5,1,57,69,68,75,69,74,73,71,...,13008020892406799985,13008020892406799985,2073085640542932003,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985,2073085640542932003,13008020892406799985
4,7,1,61,60,60,62,64,72,68,67,...,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985


In [20]:
d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [21]:
hyperparams_class = d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class(hyperparams_class.defaults(),
                                semantic_types=['https://metadata.datadrivendiscovery.org/types/Attribute'])
step3 = d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon(hyperparams=hyperparams)
res = step3.produce(inputs=step2_produce)
assert res.has_finished
step3_produce = res.value
step3_produce.head()

Unnamed: 0,F1R,F1S,F2R,F2S,F3R,F3S,F4R,F4S,F5R,F5S,...,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22
0,71,62,70,64,67,64,79,65,70,69,...,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003
1,69,71,70,78,61,63,67,65,59,59,...,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985,13008020892406799985,13008020892406799985
2,70,66,61,66,61,58,69,69,72,68,...,13008020892406799985,2073085640542932003,13008020892406799985,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003
3,57,69,68,75,69,74,73,71,57,61,...,13008020892406799985,13008020892406799985,2073085640542932003,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985,2073085640542932003,13008020892406799985
4,61,60,60,62,64,72,68,67,74,68,...,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985


In [22]:
d3m.primitives.data_transformation.cast_to_type.Common.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [23]:
hyperparams_class = d3m.primitives.data_transformation.cast_to_type.Common.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step4 = d3m.primitives.data_transformation.cast_to_type.Common(hyperparams=hyperparams)
res = step4.produce(inputs=step3_produce)
assert res.has_finished
step4_produce = res.value
step4_produce.head()

Unnamed: 0,F1R,F1S,F2R,F2S,F3R,F3S,F4R,F4S,F5R,F5S,...,F13,F14,F15,F16,F17,F18,F19,F20,F21,F22
0,71,62,70,64,67,64,79,65,70,69,...,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003
1,69,71,70,78,61,63,67,65,59,59,...,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985,13008020892406799985,13008020892406799985
2,70,66,61,66,61,58,69,69,72,68,...,13008020892406799985,2073085640542932003,13008020892406799985,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003
3,57,69,68,75,69,74,73,71,57,61,...,13008020892406799985,13008020892406799985,2073085640542932003,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985,2073085640542932003,13008020892406799985
4,61,60,60,62,64,72,68,67,74,68,...,13008020892406799985,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,2073085640542932003,13008020892406799985


In [24]:
dict(step4_produce.metadata.query([ALL_ELEMENTS, 0]))['semantic_types']

('http://schema.org/Integer',
 'https://metadata.datadrivendiscovery.org/types/Attribute')

In [25]:
d3m.primitives.data_cleaning.imputer.SKlearn.__bases__

(d3m.primitive_interfaces.unsupervised_learning.UnsupervisedLearnerPrimitiveBase,)

In [26]:
hyperparams_class = d3m.primitives.data_cleaning.imputer.SKlearn.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step5 = d3m.primitives.data_cleaning.imputer.SKlearn(hyperparams=hyperparams)
step5.set_training_data(inputs=step4_produce)
res = step5.fit()
assert res.has_finished
assert res.value is None
res = step5.produce(inputs=step4_produce)
assert res.has_finished
step5_produce = res.value
step5_produce.head()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,56,57,58,59,60,61,62,63,64,65
0,71.0,62.0,70.0,64.0,67.0,64.0,79.0,65.0,70.0,69.0,...,1.300802e+19,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18
1,69.0,71.0,70.0,78.0,61.0,63.0,67.0,65.0,59.0,59.0,...,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,1.300802e+19,1.300802e+19,1.300802e+19
2,70.0,66.0,61.0,66.0,61.0,58.0,69.0,69.0,72.0,68.0,...,1.300802e+19,2.073086e+18,1.300802e+19,1.300802e+19,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18
3,57.0,69.0,68.0,75.0,69.0,74.0,73.0,71.0,57.0,61.0,...,1.300802e+19,1.300802e+19,2.073086e+18,1.300802e+19,2.073086e+18,2.073086e+18,2.073086e+18,1.300802e+19,2.073086e+18,1.300802e+19
4,61.0,60.0,60.0,62.0,64.0,72.0,68.0,67.0,74.0,68.0,...,1.300802e+19,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,2.073086e+18,1.300802e+19


In [27]:
dict(step5_produce.metadata.query([ALL_ELEMENTS, 0]))['semantic_types']

('http://schema.org/Integer',
 'https://metadata.datadrivendiscovery.org/types/Attribute')

In [28]:
d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [29]:
hyperparams_class = d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class(hyperparams_class.defaults(),
                                semantic_types=['https://metadata.datadrivendiscovery.org/types/Target'])
step6 = d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon(hyperparams=hyperparams)
res = step6.produce(inputs=step2_produce)
assert res.has_finished
step6_produce = res.value
step6_produce.head()

Unnamed: 0,OVERALL_DIAGNOSIS
0,1
1,1
2,1
3,1
4,1


In [30]:
d3m.primitives.data_transformation.cast_to_type.Common.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [31]:
hyperparams_class = d3m.primitives.data_transformation.cast_to_type.Common.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step7 = d3m.primitives.data_transformation.cast_to_type.Common(hyperparams=hyperparams)
res = step7.produce(inputs=step6_produce)
assert res.has_finished
step7_produce = res.value
step7_produce.head()

Unnamed: 0,OVERALL_DIAGNOSIS
0,1
1,1
2,1
3,1
4,1


In [32]:
d3m.primitives.classification.random_forest.SKlearn.__bases__

(d3m.primitive_interfaces.supervised_learning.SupervisedLearnerPrimitiveBase,
 d3m.primitive_interfaces.base.ProbabilisticCompositionalityMixin)

In [33]:
hyperparams_class = d3m.primitives.classification.random_forest.SKlearn.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step8 = d3m.primitives.classification.random_forest.SKlearn(hyperparams=hyperparams)
step8.set_training_data(inputs=step5_produce, outputs=step7_produce)
res = step8.fit()
assert res.has_finished
assert res.value is None
res = step8.produce(inputs=step5_produce)
assert res.has_finished
step8_produce = res.value
step8_produce.head()

Unnamed: 0,0
0,1
1,1
2,1
3,1
4,1


In [34]:
dict(step8_produce.metadata.query([ALL_ELEMENTS, 0]))

{'structural_type': str,
 'semantic_types': ('https://metadata.datadrivendiscovery.org/types/Target',
  'https://metadata.datadrivendiscovery.org/types/PredictedTarget')}

In [35]:
d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [36]:
hyperparams_class = d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class(hyperparams_class.defaults(),
                                semantic_types=['https://metadata.datadrivendiscovery.org/types/Target',
                                                'https://metadata.datadrivendiscovery.org/types/PrimaryKey'])
step10 = d3m.primitives.data_transformation.extract_columns_by_semantic_types.DataFrameCommon(hyperparams=hyperparams)
res = step10.produce(inputs=step2_produce)
assert res.has_finished
step10_produce = res.value
step10_produce.head()

Unnamed: 0,d3mIndex,OVERALL_DIAGNOSIS
0,2,1
1,3,1
2,4,1
3,5,1
4,7,1


In [37]:
d3m.primitives.data_transformation.construct_predictions.DataFrameCommon.__bases__

(d3m.primitive_interfaces.transformer.TransformerPrimitiveBase,)

In [38]:
hyperparams_class = d3m.primitives.data_transformation.construct_predictions.DataFrameCommon.metadata.query()['primitive_code']['class_type_arguments']['Hyperparams']
hyperparams = hyperparams_class.defaults()
step9 = d3m.primitives.data_transformation.construct_predictions.DataFrameCommon(hyperparams=hyperparams)
step9.fit()
res = step9.produce(inputs=step8_produce, reference=step10_produce)
assert res.has_finished
step9_produce = res.value
step9_produce.head()

Unnamed: 0,d3mIndex,0
0,2,1
1,3,1
2,4,1
3,5,1
4,7,1


In [39]:
assert list(step9_produce.columns) == list(step10_produce.columns)

AssertionError: 