# Import Library

In [1]:
import os
import pandas as pd
from typing import Text

from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

# Prepocessing

In [2]:
df = pd.read_csv("news_articles.csv")
df = df.query("text_without_stopwords == text_without_stopwords")
df = df.groupby('label').apply(lambda s: s.sample(500)).reset_index(drop=True)
df.text = df["text_without_stopwords"]
df.label = pd.factorize(df.label)[0]
df = df[["text", "label"]]
df.to_csv("data/news_articles.csv",index=False)

# Set Variabel

In [3]:
PIPELINE_NAME = "gesang_wibawono-pipeline"

DATA_ROOT = "data"
TRANSFORM_MODULE_FILE = "modules/fake_detection_transform.py"
TRAINER_MODULE_FILE = "modules/fake_detection_trainer.py"

OUTPUT_BASE = "output"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

# Pipeline

In [4]:
def init_local_pipeline(
    components, pipeline_root: Text
) -> pipeline.Pipeline:
    """
    Main
    """

    logging.info(f"Pipeline root set to: {pipeline_root}")
    beam_args = [
        "--direct_running_mode=multi_processing"
        "----direct_num_workers=0"
    ]

    return pipeline.Pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path
        ),
        eam_pipeline_args=beam_args
    )

In [5]:
logging.set_verbosity(logging.INFO)

from modules.components import init_components

components = init_components(
    DATA_ROOT,
    transform_module=TRANSFORM_MODULE_FILE,
    trainer_module=TRAINER_MODULE_FILE,
    training_steps=20,
    eval_steps=10,
    serving_model_dir=serving_model_dir,
)

pipeline = init_local_pipeline(components, pipeline_root)
BeamDagRunner().run(pipeline=pipeline)

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Pipeline root set to: output\gesang_wibawono-pipeline
INFO:absl:Generating ephemeral wheel package for 'D:\\learning\\fake-detection\\modules\\fake_detection_transform.py' (including modules: ['components', 'fake_detection_trainer', 'fake_detection_transform']).
INFO:absl:User module package has hash fingerprint version fbba7894448d71d82e81ea0710eeb6bd5c1bd6c7300c401771b1cf173c7dae84.
INFO:absl:Executing: ['C:\\Users\\BPS\\.conda\\envs\\a443-churn\\python.exe', 'C:\\Users\\BPS\\AppData\\Local\\Temp\\tmp6vwsygds\\_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', 'C:\\Users\\BPS\\AppData\\Local\\Temp\\tmpy0t2f1u9', '--dist-dir', 'C:\\Users\\BPS\\AppData\\Local\\Temp\\tmpmhliqgeg']
INFO:absl:Successfully built user code wheel distribution at 'output\\gesang_wibawono-pipeline\\_whee

INFO:absl:Node CsvExampleGen depends on [].
INFO:absl:Node CsvExampleGen is scheduled.
INFO:absl:Node Latest_blessed_model_resolver depends on [].
INFO:absl:Node Latest_blessed_model_resolver is scheduled.
INFO:absl:Node StatisticsGen depends on ['Run[CsvExampleGen]'].
INFO:absl:Node StatisticsGen is scheduled.
INFO:absl:Node SchemaGen depends on ['Run[StatisticsGen]'].
INFO:absl:Node SchemaGen is scheduled.
INFO:absl:Node ExampleValidator depends on ['Run[SchemaGen]', 'Run[StatisticsGen]'].
INFO:absl:Node ExampleValidator is scheduled.
INFO:absl:Node Transform depends on ['Run[CsvExampleGen]', 'Run[SchemaGen]'].
INFO:absl:Node Transform is scheduled.
INFO:absl:Node Trainer depends on ['Run[SchemaGen]', 'Run[Transform]'].
INFO:absl:Node Trainer is scheduled.
INFO:absl:Node Evaluator depends on ['Run[CsvExampleGen]', 'Run[Latest_blessed_model_resolver]', 'Run[Trainer]'].
INFO:absl:Node Evaluator is scheduled.
INFO:absl:Node Pusher depends on ['Run[Evaluator]', 'Run[Trainer]'].
INFO:absl

INFO:absl:Generating examples.
INFO:absl:Processing input csv data data\* to TFExample.
INFO:absl:Examples generated.
INFO:absl:Value type <class 'NoneType'> of key version in exec_properties is not supported, going to drop it
INFO:absl:Value type <class 'list'> of key _beam_pipeline_args in exec_properties is not supported, going to drop it
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 9 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'examples': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\CsvExampleGen\\examples\\9"
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:2179316,xor_checksum:1700536015,sum_checksum:1700536015"
  }
}
custom_properties {
  key: "span"
  value {
    int_value: 0
  }
}
, artifact_type: name: "Examples"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  

INFO:absl:Generating statistics for split train.
INFO:absl:Statistics for split train written to output\gesang_wibawono-pipeline\StatisticsGen\statistics\10\Split-train.
INFO:absl:Generating statistics for split eval.
INFO:absl:Statistics for split eval written to output\gesang_wibawono-pipeline\StatisticsGen\statistics\10\Split-eval.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 10 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'statistics': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\StatisticsGen\\statistics\\10"
, artifact_type: name: "ExampleStatistics"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
base_type: STATISTICS
)]}) for execution 10
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node StatisticsGen is finished.
INFO:absl:node SchemaGen is running.
INFO:absl:Running launcher for node_info {
  ty

INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'schema': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\SchemaGen\\schema\\11"
, artifact_type: name: "Schema"
)]}) for execution 11
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node SchemaGen is finished.
INFO:absl:node Transform is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.transform.component.Transform"
    base_type: TRANSFORM
  }
  id: "Transform"
}
contexts {
  contexts {
    type {
      name: "pipeline"
    }
    name {
      field_value {
        string_value: "gesang_wibawono-pipeline"
      }
    }
  }
  contexts {
    type {
      name: "pipeline_run"
    }
    name {
      field_value {
        string_value: "20231121-100702.526989"
      }
    }
  }
  contexts {
    type {
      name: "node"
    }
    name {
      field_value {
        string_value: "gesang_wibawono-pipeline.Transform"
      }
    }
  }
}
inputs {
  inputs {
    

INFO:absl:Analyze the 'train' split and transform all splits when splits_config is not set.
INFO:absl:udf_utils.get_fn {'module_file': None, 'module_path': 'fake_detection_transform@output\\gesang_wibawono-pipeline\\_wheels\\tfx_user_code_Transform-0.0+fbba7894448d71d82e81ea0710eeb6bd5c1bd6c7300c401771b1cf173c7dae84-py3-none-any.whl', 'preprocessing_fn': None} 'preprocessing_fn'
INFO:absl:Installing 'output\\gesang_wibawono-pipeline\\_wheels\\tfx_user_code_Transform-0.0+fbba7894448d71d82e81ea0710eeb6bd5c1bd6c7300c401771b1cf173c7dae84-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['C:\\Users\\BPS\\.conda\\envs\\a443-churn\\python.exe', '-m', 'pip', 'install', '--target', 'C:\\Users\\BPS\\AppData\\Local\\Temp\\tmptbya79bf', 'output\\gesang_wibawono-pipeline\\_wheels\\tfx_user_code_Transform-0.0+fbba7894448d71d82e81ea0710eeb6bd5c1bd6c7300c401771b1cf173c7dae84-py3-none-any.whl']
INFO:absl:Successfully installed 'output\\gesang_wibawono-pipeline\\_wheels\\tfx_user_code_Tr

Instructions for updating:
Use ref() instead.


Instructions for updating:
Use ref() instead.
INFO:absl:Feature label has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature text has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature label has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature text has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature label has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature text has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature label has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature text has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature label has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature text has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature label has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature text has a shape dim {
  size: 1
}
. Setting to DenseTensor.


INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Transform\transform_graph\12\.temp_path\tftransform_tmp\82b8da3205f6462cbec21deaa8aacdc7\assets


INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Transform\transform_graph\12\.temp_path\tftransform_tmp\82b8da3205f6462cbec21deaa8aacdc7\assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 12 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'transform_graph': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Transform\\transform_graph\\12"
, artifact_type: name: "TransformGraph"
)], 'post_transform_anomalies': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Transform\\post_transform_anomalies\\12"
, artifact_type: name: "ExampleAnomalies"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
)], 'post_transform_schema': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Transform\\post_transform_schema\\12"
, artifact_type: name: "Schema"
)], 'updated_analyzer_cache': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Transform\\updated_analyzer_cache\\12"
, artifact_type: name: "TransformCache"
)], 'p

INFO:absl:Validating schema against the computed statistics for split train.
INFO:absl:Validation complete for split train. Anomalies written to output\gesang_wibawono-pipeline\ExampleValidator\anomalies\13\Split-train.
INFO:absl:Validating schema against the computed statistics for split eval.
INFO:absl:Validation complete for split eval. Anomalies written to output\gesang_wibawono-pipeline\ExampleValidator\anomalies\13\Split-eval.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 13 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'anomalies': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\ExampleValidator\\anomalies\\13"
, artifact_type: name: "ExampleAnomalies"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
)]}) for execution 13
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node ExampleValidator is finished.
INF

INFO:absl:udf_utils.get_fn {'custom_config': 'null', 'module_path': 'fake_detection_trainer@output\\gesang_wibawono-pipeline\\_wheels\\tfx_user_code_Trainer-0.0+fbba7894448d71d82e81ea0710eeb6bd5c1bd6c7300c401771b1cf173c7dae84-py3-none-any.whl', 'eval_args': '{\n  "num_steps": 10,\n  "splits": [\n    "eval"\n  ]\n}', 'train_args': '{\n  "num_steps": 20,\n  "splits": [\n    "train"\n  ]\n}'} 'run_fn'
INFO:absl:Installing 'output\\gesang_wibawono-pipeline\\_wheels\\tfx_user_code_Trainer-0.0+fbba7894448d71d82e81ea0710eeb6bd5c1bd6c7300c401771b1cf173c7dae84-py3-none-any.whl' to a temporary directory.
INFO:absl:Executing: ['C:\\Users\\BPS\\.conda\\envs\\a443-churn\\python.exe', '-m', 'pip', 'install', '--target', 'C:\\Users\\BPS\\AppData\\Local\\Temp\\tmpr45ryxwj', 'output\\gesang_wibawono-pipeline\\_wheels\\tfx_user_code_Trainer-0.0+fbba7894448d71d82e81ea0710eeb6bd5c1bd6c7300c401771b1cf173c7dae84-py3-none-any.whl']
INFO:absl:Successfully installed 'output\\gesang_wibawono-pipeline\\_wheels\\

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_xf (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization (TextVec  (None, 500)              0         
 torization)                                                     
                                                                 
 embedding (Embedding)       (None, 500, 16)           80000     
                                                                 
 global_average_pooling1d (G  (None, 16)               0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 64)                1088      
                                                                 
 dense_1 (Dense)             (None, 32)                2080  

INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving\assets


Epoch 2/10
Epoch 2: val_binary_accuracy improved from 0.45625 to 0.63125, saving model to output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving
INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving\assets


INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving\assets


Epoch 3/10
Epoch 3: val_binary_accuracy improved from 0.63125 to 0.70625, saving model to output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving
INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving\assets


INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving\assets


Epoch 4/10
Epoch 4: val_binary_accuracy did not improve from 0.70625
Epoch 5/10
Epoch 5: val_binary_accuracy did not improve from 0.70625
Epoch 5: early stopping
INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving\assets


INFO:tensorflow:Assets written to: output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving\assets
INFO:absl:Training complete. Model written to output\gesang_wibawono-pipeline\Trainer\model\14\Format-Serving. ModelRun written to output\gesang_wibawono-pipeline\Trainer\model_run\14
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 14 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Trainer\\model\\14"
, artifact_type: name: "Model"
base_type: MODEL
)], 'model_run': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Trainer\\model_run\\14"
, artifact_type: name: "ModelRun"
)]}) for execution 14
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Trainer is finished.
INFO:absl:node Evaluator is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.evaluator.comp

INFO:absl:MetadataStore with DB connection initialized
INFO:absl:Going to run a new execution 15
INFO:absl:Going to run a new execution: ExecutionInfo(execution_id=15, input_dict={'baseline_model': [], 'model': [Artifact(artifact: id: 25
type_id: 27
uri: "output\\gesang_wibawono-pipeline\\Trainer\\model\\14"
custom_properties {
  key: "is_external"
  value {
    int_value: 0
  }
}
custom_properties {
  key: "state"
  value {
    string_value: "published"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.11.0"
  }
}
state: LIVE
create_time_since_epoch: 1700536065617
last_update_time_since_epoch: 1700536065617
, artifact_type: id: 27
name: "Model"
base_type: MODEL
)], 'examples': [Artifact(artifact: id: 13
type_id: 15
uri: "output\\gesang_wibawono-pipeline\\CsvExampleGen\\examples\\9"
properties {
  key: "split_names"
  value {
    string_value: "[\"train\", \"eval\"]"
  }
}
custom_properties {
  key: "file_format"
  value {
    string_value: "tfrecords_gzip"


INFO:absl:udf_utils.get_fn {'fairness_indicator_thresholds': 'null', 'example_splits': 'null', 'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "AUC"\n        },\n        {\n          "class_name": "TruePositives"\n        },\n        {\n          "class_name": "FalsePositives"\n        },\n        {\n          "class_name": "TrueNegatives"\n        },\n        {\n          "class_name": "FalseNegatives"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": 0.0001,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.5\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\n    {\n      "label_key": "label"\n    }\n  ],\n  "slicing_specs": [\n    {}\n  ]\n}'} 'custom_eval_



INFO:absl:The 'example_splits' parameter is not set, using 'eval' split.
INFO:absl:Evaluating model.
INFO:absl:udf_utils.get_fn {'fairness_indicator_thresholds': 'null', 'example_splits': 'null', 'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "AUC"\n        },\n        {\n          "class_name": "TruePositives"\n        },\n        {\n          "class_name": "FalsePositives"\n        },\n        {\n          "class_name": "TrueNegatives"\n        },\n        {\n          "class_name": "FalseNegatives"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": 0.0001,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.5\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\



























INFO:absl:Evaluation complete. Results written to output\gesang_wibawono-pipeline\Evaluator\evaluation\15.
INFO:absl:Checking validation results.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
INFO:absl:Blessing result True written to output\gesang_wibawono-pipeline\Evaluator\blessing\15.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 15 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'blessing': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Evaluator\\blessing\\15"
, artifact_type: name: "ModelBlessing"
)], 'evaluation': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Evaluator\\evaluation\\15"
, artifact_type: name: "ModelEvaluation"
)]}) for execution 15
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Evaluator is finished.
INFO:absl:node Pusher is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.pusher.component.Pusher"
    base_type: DEPLOY
  }
  id: "Pusher"
}
contexts {
  contexts {
    type {
      name

INFO:absl:Model version: 1700536075
INFO:absl:Model written to serving path output\serving_model\1700536075.
INFO:absl:Model pushed to output\gesang_wibawono-pipeline\Pusher\pushed_model\16.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 16 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'pushed_model': [Artifact(artifact: uri: "output\\gesang_wibawono-pipeline\\Pusher\\pushed_model\\16"
, artifact_type: name: "PushedModel"
base_type: MODEL
)]}) for execution 16
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Pusher is finished.


In [6]:
!pip freeze >> requirements.txt