datamllab · hwy893747147 · Jun 23, 2021 · Jul 26, 2021 · Jul 26, 2021 · Jul 26, 2021
diff --git a/datasets/anomaly/raw_data/500_UCR_Anomaly_robotDOG1_10000_19280_19360.txt b/datasets/anomaly/raw_data/500_UCR_Anomaly_robotDOG1_10000_19280_19360.txt
diff --git a/datasets/anomaly/raw_data/blockchain.csv b/datasets/anomaly/raw_data/blockchain.csv
diff --git a/examples/Demo Notebook/TODS Official Demo Notebook.ipynb b/examples/Demo Notebook/TODS Official Demo Notebook.ipynb
diff --git a/examples/Demo Notebook/TODSBlockchainNotebook.ipynb b/examples/Demo Notebook/TODSBlockchainNotebook.ipynb
diff --git a/examples/Demo Notebook/TODSOfficialNotebook.ipynb b/examples/Demo Notebook/TODSOfficialNotebook.ipynb
diff --git a/examples/axolotl_interface/example_pipelines/script/build_sod_pipeline.py b/examples/axolotl_interface/example_pipelines/script/build_sod_pipeline.py
@@ -0,0 +1,78 @@
+from d3m import index
+from d3m.metadata.base import ArgumentType
+from d3m.metadata.pipeline import Pipeline, PrimitiveStep
+
+# -> dataset_to_dataframe -> column_parser -> extract_columns_by_semantic_types(attributes) -> imputer -> random_forest
+#                                             extract_columns_by_semantic_types(targets)    ->            ^
+
+# Creating pipeline
+pipeline_description = Pipeline()
+pipeline_description.add_input(name='inputs')
+
+# Step 0: dataset_to_dataframe
+step_0 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.dataset_to_dataframe'))
+step_0.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='inputs.0')
+step_0.add_output('produce')
+pipeline_description.add_step(step_0)
+
+# Step 1: column_parser
+step_1 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.column_parser'))
+step_1.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
+step_1.add_output('produce')
+pipeline_description.add_step(step_1)
+
+# Step 2: extract_columns_by_semantic_types(attributes)
+step_2 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
+step_2.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
+step_2.add_output('produce')
+step_2.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
+							  data=['https://metadata.datadrivendiscovery.org/types/Attribute'])
+pipeline_description.add_step(step_2)
+
+# Step 3: extract_columns_by_semantic_types(targets)
+step_3 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.extract_columns_by_semantic_types'))
+step_3.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.0.produce')
+step_3.add_output('produce')
+step_3.add_hyperparameter(name='semantic_types', argument_type=ArgumentType.VALUE,
+							data=['https://metadata.datadrivendiscovery.org/types/TrueTarget'])
+pipeline_description.add_step(step_3)
+
+attributes = 'steps.2.produce'
+targets = 'steps.3.produce'
+
+# Step 4: processing
+#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.timeseries_processing.transformation.axiswise_scaler'))
+step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_maximum'))
+#step_4 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.feature_analysis.statistical_minimum'))
+step_4.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference=attributes)
+step_4.add_output('produce')
+pipeline_description.add_step(step_4)
+
+# Step 5: supervised outlier detection algorithm
+step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.sod_primitive'))
+step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
+step_5.add_argument(name='outputs', argument_type=ArgumentType.CONTAINER, data_reference=targets)
+step_5.add_output('produce')
+pipeline_description.add_step(step_5)
+
+# step_5 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.detection_algorithm.pyod_knn'))
+# step_5.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.4.produce')
+# step_5.add_output('produce')
+# pipeline_description.add_step(step_5)
+
+# Step 6: Predictions
+step_6 = PrimitiveStep(primitive=index.get_primitive('d3m.primitives.tods.data_processing.construct_predictions'))
+step_6.add_argument(name='inputs', argument_type=ArgumentType.CONTAINER, data_reference='steps.5.produce')
+step_6.add_argument(name='reference', argument_type=ArgumentType.CONTAINER, data_reference='steps.1.produce')
+step_6.add_output('produce')
+pipeline_description.add_step(step_6)
+
+# Final Output
+pipeline_description.add_output(name='output predictions', data_reference='steps.6.produce')
+
+# Output to json
+data = pipeline_description.to_json()
+with open('./examples/axolotl_interface/example_pipelines/sod_pipeline.json', 'w') as f:
+    f.write(data)
+    print(data)
+
diff --git a/examples/sk_examples/AEAutokeras.py b/examples/sk_examples/AEAutokeras.py
@@ -0,0 +1,86 @@
+from autokeras.engine.block import Block
+import autokeras as ak
+import tensorflow as tf
+import numpy as np
+import pandas as pd
+from tensorflow.python.util import nest
+from numpy import asarray
+from sklearn.metrics import precision_recall_curve
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import classification_report
+# from sklearn.model_selection import train_test_split
+from autokeras import StructuredDataClassifier
+from autokeras import StructuredDataRegressor
+from tods.sk_interface.detection_algorithm.AutoEncoder_skinterface import AutoEncoderSKI
+
+#how to split yahoo?
+#is y true and y pred correct?
+#show the notebook error
+#how to get autokeras reports
+
+#dataset
+dataset = pd.read_csv("./yahoo_sub_5.csv")
+data = dataset.to_numpy()
+labels = dataset.iloc[:,6]
+value1 = dataset.iloc[:,2] # delete later
+print(labels)
+
+#tods primitive
+transformer = AutoEncoderSKI()
+transformer.fit(data)
+tods_output = transformer.predict(data)
+prediction_score = transformer.predict_score(data)
+print('result from AE primitive: \n', tods_output) #sk report
+print('score from AE: \n', prediction_score)
+
+#sk report
+y_true = labels
+y_pred = tods_output
+
+print('Accuracy Score: ', accuracy_score(y_true, y_pred))
+
+print('confusion matrix: \n', confusion_matrix(y_true, y_pred))
+
+print(classification_report(y_true, y_pred))
+
+precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
+f1_scores = 2*recall*precision/(recall+precision)
+
+print('Best threshold: ', thresholds[np.argmax(f1_scores)])
+print('Best F1-Score: ', np.max(f1_scores))
+
+
+#classifier
+print('Classifier Starts here:')
+search = StructuredDataClassifier(max_trials=15)
+# perform the search
+search.fit(x=data, y=labels, verbose=0) # y = data label colume
+# evaluate the model
+loss, acc = search.evaluate(data, labels, verbose=0)
+print('Accuracy: %.3f' % acc)
+# use the model to make a prediction
+# row = [0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,0.1609,0.1582,0.2238,0.0645,0.0660,0.2273,0.3100,0.2999,0.5078,0.4797,0.5783,0.5071,0.4328,0.5550,0.6711,0.6415,0.7104,0.8080,0.6791,0.3857,0.1307,0.2604,0.5121,0.7547,0.8537,0.8507,0.6692,0.6097,0.4943,0.2744,0.0510,0.2834,0.2825,0.4256,0.2641,0.1386,0.1051,0.1343,0.0383,0.0324,0.0232,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032]
+# X_new = asarray([row]).astype('float32')
+yhat = search.predict(data)
+print('Predicted: %.3f' % yhat[0])
+# get the best performing model
+model = search.export_model()
+# summarize the loaded model
+model.summary()
+
+#regressor
+print('Regressor Starts here:')
+search = StructuredDataRegressor(max_trials=15, loss='mean_absolute_error')
+# perform the search
+search.fit(x=data, y=labels, verbose=0) # y = data label
+mae, _ = search.evaluate(data, labels, verbose=0)
+print('MAE: %.3f' % mae)
+# use the model to make a prediction
+# X_new = asarray([[108]]).astype('float32')
+yhat = search.predict(data)
+print('Predicted: %.3f' % yhat[0])
+# get the best performing model
+model = search.export_model()
+# summarize the loaded model
+model.summary()
diff --git a/examples/sk_examples/DeepLog_test.py b/examples/sk_examples/DeepLog_test.py
@@ -22,7 +22,7 @@
 prediction_labels = transformer.predict(X_test)
 prediction_score = transformer.predict_score(X_test)
 
-print("Primitive: ", transformer.primitive)
+# print("Primitive: ", transformer.primitive)
 print("Prediction Labels\n", prediction_labels)
 print("Prediction Score\n", prediction_score)
 

diff --git a/examples/sk_examples/SubSeqAutokeras.py b/examples/sk_examples/SubSeqAutokeras.py
@@ -0,0 +1,108 @@
+import autokeras as ak
+from autokeras.engine.block import Block
+import tensorflow as tf
+import numpy as np
+import pandas as pd
+from tensorflow.python.util import nest
+from tods.sk_interface.timeseries_processing.SubsequenceSegmentation_skinterface import SubsequenceSegmentationSKI
+# load dataset
+dataset = pd.read_csv("./yahoo_sub_5.csv")
+data = dataset.to_numpy()
+labels = dataset.iloc[:,6]
+print(labels)
+transformer = SubsequenceSegmentationSKI()
+tods_output = transformer.produce(data)
+print('result from SubsequenceSegmentation primitive:', tods_output)
+print(tods_output.shape)
+
+#autoregression 
+
+class MLPInteraction(Block):
+    """Module for MLP operation. This block can be configured with different layer, unit, and other settings.
+    # Attributes:
+        units (int). The units of all layer in the MLP block.
+        num_layers (int). The number of the layers in the MLP block.
+        use_batchnorm (Boolean). Use batch normalization or not.
+        dropout_rate(float). The value of drop out in the last layer of MLP.
+    """
+
+    def __init__(self,
+                 units=None,
+                 num_layers=None,
+                 use_batchnorm=None,
+                 dropout_rate=None,
+                 **kwargs):
+        super().__init__(**kwargs)
+        self.units = units
+        self.num_layers = num_layers
+        self.use_batchnorm = use_batchnorm
+        self.dropout_rate = dropout_rate
+
+    def get_state(self):
+        state = super().get_state()
+        state.update({
+            'units': self.units,
+            'num_layers': self.num_layers,
+            'use_batchnorm': self.use_batchnorm,
+            'dropout_rate': self.dropout_rate})
+        return state
+
+    def set_state(self, state):
+        super().set_state(state)
+        self.units = state['units']
+        self.num_layers = state['num_layers']
+        self.use_batchnorm = state['use_batchnorm']
+        self.dropout_rate = state['dropout_rate']
+
+    def build(self, hp, inputs=None):
+        input_node = [tf.keras.layers.Flatten()(node) if len(node.shape) > 2 else node for node in nest.flatten(inputs)]
+        output_node = tf.concat(input_node, axis=1)
+        num_layers = self.num_layers or hp.Choice('num_layers', [1, 2, 3], default=2)
+        use_batchnorm = self.use_batchnorm
+        if use_batchnorm is None:
+            use_batchnorm = hp.Choice('use_batchnorm', [True, False], default=False)
+        dropout_rate = self.dropout_rate or hp.Choice('dropout_rate',
+                                                      [0.0, 0.25, 0.5],
+                                                      default=0)
+
+        for i in range(num_layers):
+            units = self.units or hp.Choice(
+                'units_{i}'.format(i=i),
+                [16, 32, 64, 128, 256, 512, 1024],
+                default=32)
+            output_node = tf.keras.layers.Dense(units)(output_node)
+            if use_batchnorm:
+                output_node = tf.keras.layers.BatchNormalization()(output_node)
+            output_node = tf.keras.layers.ReLU()(output_node)
+            output_node = tf.keras.layers.Dropout(dropout_rate)(output_node)
+        return output_node
+inputs = ak.Input(shape=[7,]) #important!!! depends on shape above
+print(inputs.shape)
+print(inputs.dtype)
+mlp_input = MLPInteraction()([inputs])
+mlp_output = MLPInteraction()([mlp_input])
+
+# Step 2.3: Setup optimizer to handle the target task
+output = ak.RegressionHead()(mlp_output)
+
+# Step 3: Build the searcher, which provides search algorithm
+auto_model = ak.AutoModel(inputs=inputs,#produce
+                          outputs=output, #final mlp out
+                          objective='val_mean_squared_error',
+                          max_trials=5
+                          )
+# Step 4: Use the searcher to search the recommender
+auto_model.fit(x=[tods_output],
+               y=tods_output, #make new colume of labels of yahoo dataset # first element of next part
+               batch_size=32,
+               epochs=5)
+
+accuracy = auto_model.evaluate(x=[tods_output],
+               y=labels)
+
+print(accuracy)
+# logger.info('Validation Accuracy (mse): {}'.format(auto_model.evaluate(x=[val_X_categorical],
+#                                                                        y=val_y)))
+# # Step 5: Evaluate the searched model
+# logger.info('Test Accuracy (mse): {}'.format(auto_model.evaluate(x=[tods_output],
+#                                                                  y=labels)))
diff --git a/examples/sk_examples/Telemanom_test.py b/examples/sk_examples/Telemanom_test.py
@@ -1,5 +1,6 @@
 import numpy as np
-from tods.tods_skinterface.primitiveSKI.detection_algorithm.Telemanom_skinterface import TelemanomSKI
+from tods.sk_interface.detection_algorithm.Telemanom_skinterface import TelemanomSKI
+# from tods.tods_skinterface.primitiveSKI.detection_algorithm.Telemanom_skinterface import TelemanomSKI
 from sklearn.metrics import precision_recall_curve
 from sklearn.metrics import accuracy_score
 from sklearn.metrics import confusion_matrix
@@ -25,7 +26,7 @@
 prediction_labels = transformer.predict(X_test)
 prediction_score = transformer.predict_score(X_test)
 
-print("Primitive: ", transformer.primitive)
+# print("Primitive: ", transformer.primitive)
 print("Prediction Labels\n", prediction_labels)
 print("Prediction Score\n", prediction_score)
 y_true = prediction_labels_train

diff --git a/examples/sk_examples/Telemanom_yahoo_test.py b/examples/sk_examples/Telemanom_yahoo_test.py
@@ -0,0 +1,67 @@
+import numpy as np
+from tods.sk_interface.detection_algorithm.Telemanom_skinterface import TelemanomSKI
+# from tods.tods_skinterface.primitiveSKI.detection_algorithm.Telemanom_skinterface import TelemanomSKI
+from sklearn.metrics import precision_recall_curve
+from sklearn.metrics import accuracy_score
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import classification_report
+import matplotlib.pyplot as plt
+from sklearn import metrics
+import pandas as pd
+#prepare the data
+
+data = pd.read_csv("./yahoo_sub_5.csv").to_numpy()
+# print("shape:", data.shape)
+# print("datatype of data:",data.dtype)
+# print("First 5 rows:\n", data[:5])
+
+# X_train = np.expand_dims(data[:10000], axis=1)
+# X_test = np.expand_dims(data[10000:], axis=1)
+
+# print("First 5 rows train:\n", X_train[:5])
+# print("First 5 rows test:\n", X_test[:5])
+
+transformer = TelemanomSKI(l_s= 2, n_predictions= 1)
+transformer.fit(data)
+# prediction_labels_train = transformer.predict(X_train)
+prediction_labels = transformer.predict(data)
+prediction_score = transformer.predict_score(data)
+
+# print("Primitive: ", transformer.primitive)
+print("Prediction Labels\n", prediction_labels)
+print("Prediction Score\n", prediction_score)
+
+df1 = pd.DataFrame(prediction_labels)
+df2 = pd.DataFrame(prediction_score)
+
+# df1.to_csv(r'./labels.csv', index = False)
+df2.to_csv(r'./scores.csv', index = False)
+# result = pd.merge(df1, df2[[]])
+# result = [prediction_labels, prediction_score]
+# # result = pd.DataFrame({'label': prediction_labels, 'score': prediction_score}, columns=['label', 'score'], index=[0])
+# print(result)
+# pd.DataFrame(result).to_csv("./teleSKI.csv")
+# y_true = prediction_labels_train
+# y_pred = prediction_labels
+
+# print('Accuracy Score: ', accuracy_score(y_true, y_pred))
+
+# confusion_matrix(y_true, y_pred)
+
+# print(classification_report(y_true, y_pred))
+
+# precision, recall, thresholds = precision_recall_curve(y_true, y_pred)
+# f1_scores = 2*recall*precision/(recall+precision)
+
+# print('Best threshold: ', thresholds[np.argmax(f1_scores)])
+# print('Best F1-Score: ', np.max(f1_scores))
+
+# fpr, tpr, threshold = metrics.roc_curve(y_true, y_pred)
+# roc_auc = metrics.auc(fpr, tpr)
+
+# plt.title('ROC')
+# plt.plot(fpr, tpr, 'b', label = 'AUC = %0.2f' % roc_auc)
+# plt.legend(loc = 'lower right')
+# plt.ylabel('True Positive Rate')
+# plt.xlabel('False Positive Rate')
+# plt.show()