In [1]:
import os
import sys
from typing import Text
 
from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

In [2]:
PIPELINE_NAME = "customer-churn-pipeline"

# pipeline inputs
DATA_ROOT = "data"
TRANSFORM_MODULE_FILE = "modules/customer_churn_transform.py"
TRAINER_MODULE_FILE = "modules/customer_churn_trainer.py"
# requirement_file = os.path.join(root, "requirements.txt")

# pipeline outputs
OUTPUT_BASE = "output"
serving_model_dir = os.path.join(OUTPUT_BASE, 'serving_model')
pipeline_root = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
metadata_path = os.path.join(pipeline_root, "metadata.sqlite")

In [3]:
def init_local_pipeline(
    components, pipeline_root: Text
) -> pipeline.Pipeline:
    
    logging.info(f"Pipeline root set to: {pipeline_root}")
    beam_args = [
        "--direct_running_mode=multi_processing"
        # 0 auto-detect based on on the number of CPUs available 
        # during execution time.
        "----direct_num_workers=0" 
    ]
    
    return pipeline.Pipeline(
        pipeline_name=PIPELINE_NAME,
        pipeline_root=pipeline_root,
        components=components,
        enable_cache=True,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path
        ),
        eam_pipeline_args=beam_args
    )

In [4]:
if __name__ == "__main__":
    logging.set_verbosity(logging.INFO)
    
    from modules.components import init_components
    
    components = init_components(
        DATA_ROOT,
        training_module=TRAINER_MODULE_FILE,
        transform_module=TRANSFORM_MODULE_FILE,
        training_steps=5000,
        eval_steps=1000,
        serving_model_dir=serving_model_dir,
    )
    
    pipeline = init_local_pipeline(components, pipeline_root)
    BeamDagRunner().run(pipeline=pipeline)

INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Excluding no splits because exclude_splits is not set.
INFO:absl:Pipeline root set to: output\customer-churn-pipeline
INFO:absl:Generating ephemeral wheel package for 'd:\\Coolyeah\\Dicoding\\Submission_Dicoding\\ml_ops\\Latihan 2\\modules\\customer_churn_transform.py' (including modules: ['components', 'customer_churn_trainer', 'customer_churn_transform']).
INFO:absl:User module package has hash fingerprint version ab366fa6a9beb811b09b347c778e2224f498a129d47e1a42aa6beb3e7d74a5d6.
INFO:absl:Executing: ['d:\\Coolyeah\\Dicoding\\Submission_Dicoding\\ml_ops\\envs\\mlops-churn\\python.exe', 'C:\\Users\\User\\AppData\\Local\\Temp\\tmp73qiu_zg\\_tfx_generated_setup.py', 'bdist_wheel', '--bdist-dir', 'C:\\Users\\User\\AppData\\Local\\Temp\\tmpwzbobpne', '--dist-dir', 'C:\\Users\\User\\AppData\\Local\\Temp\\tmp3873ls37']
INFO:absl:Successfully built user c

INFO:absl:Node CsvExampleGen depends on [].
INFO:absl:Node CsvExampleGen is scheduled.
INFO:absl:Node Latest_blessed_model_resolver depends on [].
INFO:absl:Node Latest_blessed_model_resolver is scheduled.
INFO:absl:Node StatisticsGen depends on ['Run[CsvExampleGen]'].
INFO:absl:Node StatisticsGen is scheduled.
INFO:absl:Node SchemaGen depends on ['Run[StatisticsGen]'].
INFO:absl:Node SchemaGen is scheduled.
INFO:absl:Node ExampleValidator depends on ['Run[SchemaGen]', 'Run[StatisticsGen]'].
INFO:absl:Node ExampleValidator is scheduled.
INFO:absl:Node Transform depends on ['Run[CsvExampleGen]', 'Run[SchemaGen]'].
INFO:absl:Node Transform is scheduled.
INFO:absl:Node Trainer depends on ['Run[SchemaGen]', 'Run[Transform]'].
INFO:absl:Node Trainer is scheduled.
INFO:absl:Node Evaluator depends on ['Run[CsvExampleGen]', 'Run[Latest_blessed_model_resolver]', 'Run[Trainer]'].
INFO:absl:Node Evaluator is scheduled.
INFO:absl:Node Pusher depends on ['Run[Evaluator]', 'Run[Trainer]'].
INFO:absl

Instructions for updating:
Use ref() instead.


Instructions for updating:
Use ref() instead.
INFO:absl:Feature Churn has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature InternetService has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature MonthlyCharges has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature PaperlessBilling has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature Partner has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature PhoneService has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature SeniorCitizen has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature StreamingTV has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature TotalCharges has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature customerID has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:absl:Feature gender has a shape dim {
  size: 1
}
. Setting to DenseTensor.
INFO:abs

INFO:tensorflow:Assets written to: output\customer-churn-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\25edf3f9252b4c028ac396344eb389a3\assets


INFO:tensorflow:Assets written to: output\customer-churn-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\25edf3f9252b4c028ac396344eb389a3\assets


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or

INFO:tensorflow:Assets written to: output\customer-churn-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\baa138e31675471682fd48d7e9d92ee0\assets


INFO:tensorflow:Assets written to: output\customer-churn-pipeline\Transform\transform_graph\5\.temp_path\tftransform_tmp\baa138e31675471682fd48d7e9d92ee0\assets
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentially more efficient implementation.
INFO:absl:If the number of unique tokens is smaller than the provided top_k or approximation error is acceptable, consider using tft.experimental.approximate_vocabulary for a potentiall

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 5 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'transformed_examples': [Artifact(artifact: uri: "output\\customer-churn-pipeline\\Transform\\transformed_examples\\5"
, artifact_type: name: "Examples"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
properties {
  key: "version"
  value: INT
}
base_type: DATASET
)], 'pre_transform_schema': [Artifact(artifact: uri: "output\\customer-churn-pipeline\\Transform\\pre_transform_schema\\5"
, artifact_type: name: "Schema"
)], 'pre_transform_stats': [Artifact(artifact: uri: "output\\customer-churn-pipeline\\Transform\\pre_transform_stats\\5"
, artifact_type: name: "ExampleStatistics"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
base_type: STATISTICS
)], 'post

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 InternetService_xf (InputLayer  [(None, 4)]         0           []                               
 )                                                                                                
                                                                                                  
 SeniorCitizen_xf (InputLayer)  [(None, 3)]          0           []                               
                                                                                                  
 PaperlessBilling_xf (InputLaye  [(None, 3)]         0           []                               
 r)                                                                                               
                                                                                              

INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: output\customer-churn-pipeline\Trainer\model\7\Format-Serving\assets


INFO:tensorflow:Assets written to: output\customer-churn-pipeline\Trainer\model\7\Format-Serving\assets


You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


INFO:absl:Training complete. Model written to output\customer-churn-pipeline\Trainer\model\7\Format-Serving. ModelRun written to output\customer-churn-pipeline\Trainer\model_run\7
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 7 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'model': [Artifact(artifact: uri: "output\\customer-churn-pipeline\\Trainer\\model\\7"
, artifact_type: name: "Model"
base_type: MODEL
)], 'model_run': [Artifact(artifact: uri: "output\\customer-churn-pipeline\\Trainer\\model_run\\7"
, artifact_type: name: "ModelRun"
)]}) for execution 7
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Trainer is finished.
INFO:absl:node Evaluator is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.evaluator.component.Evaluator"
    base_type: EVALUATE
  }
  id: "Evaluator"
}
contexts {
  contexts {
    type {
      name: "pip



INFO:absl:The 'example_splits' parameter is not set, using 'eval' split.
INFO:absl:Evaluating model.
INFO:absl:udf_utils.get_fn {'fairness_indicator_thresholds': 'null', 'eval_config': '{\n  "metrics_specs": [\n    {\n      "metrics": [\n        {\n          "class_name": "AUC"\n        },\n        {\n          "class_name": "Precision"\n        },\n        {\n          "class_name": "Recall"\n        },\n        {\n          "class_name": "ExampleCount"\n        },\n        {\n          "class_name": "BinaryAccuracy",\n          "threshold": {\n            "change_threshold": {\n              "absolute": 0.0001,\n              "direction": "HIGHER_IS_BETTER"\n            },\n            "value_threshold": {\n              "lower_bound": 0.5\n            }\n          }\n        }\n      ]\n    }\n  ],\n  "model_specs": [\n    {\n      "label_key": "Churn"\n    }\n  ],\n  "slicing_specs": [\n    {},\n    {\n      "feature_keys": [\n        "gender",\n        "Partner"\n      ]\n    }\n 



























INFO:absl:Evaluation complete. Results written to output\customer-churn-pipeline\Evaluator\evaluation\8.
INFO:absl:Checking validation results.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`
INFO:absl:Blessing result True written to output\customer-churn-pipeline\Evaluator\blessing\8.
INFO:absl:Cleaning up stateless execution info.
INFO:absl:Execution 8 succeeded.
INFO:absl:Cleaning up stateful execution info.
INFO:absl:Publishing output artifacts defaultdict(<class 'list'>, {'blessing': [Artifact(artifact: uri: "output\\customer-churn-pipeline\\Evaluator\\blessing\\8"
, artifact_type: name: "ModelBlessing"
)], 'evaluation': [Artifact(artifact: uri: "output\\customer-churn-pipeline\\Evaluator\\evaluation\\8"
, artifact_type: name: "ModelEvaluation"
)]}) for execution 8
INFO:absl:MetadataStore with DB connection initialized
INFO:absl:node Evaluator is finished.
INFO:absl:node Pusher is running.
INFO:absl:Running launcher for node_info {
  type {
    name: "tfx.components.pusher.component.Pusher"
    base_type: DEPLOY
  }
  id: "Pusher"
}
contexts {
  contexts {
    type {
      name: "pipel

In [15]:
import tensorflow as tf
import requests
import json
import base64

def encode_example():
    example = tf.train.Example(features=tf.train.Features(feature={
        "customerID": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"12345"])),  # <-- Tambahin ini
        "InternetService": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Fiber optic"])),
        "SeniorCitizen": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Yes"])),  # <-- FIXED: String, bukan int
        "PaperlessBilling": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Yes"])),
        "Partner": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"No"])),
        "PhoneService": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Yes"])),
        "StreamingTV": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"No"])),
        "gender": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Female"])),
        "MonthlyCharges": tf.train.Feature(float_list=tf.train.FloatList(value=[70.0])),
        "TotalCharges": tf.train.Feature(float_list=tf.train.FloatList(value=[2000.0])),
        "tenure": tf.train.Feature(int64_list=tf.train.Int64List(value=[24]))  # Pastikan tetap int64
    }))
    return example.SerializeToString()

# Encode data ke TFRecord format
encoded_example = encode_example()

# Convert ke base64
encoded_example_b64 = base64.b64encode(encoded_example).decode('utf-8')

# Kirim request ke Railway
url = "https://mlops-churn-production-final.up.railway.app/v1/models/cc-model:predict"
headers = {"content-type": "application/json"}
data = json.dumps({
    "signature_name": "serving_default",
    "instances": [{"examples": {"b64": encoded_example_b64}}]  
})

response = requests.post(url, data=data, headers=headers)
print(response.json())


{'predictions': [[0.371655136]]}


In [16]:
import tensorflow as tf
import base64

def encode_example():
    example = tf.train.Example(features=tf.train.Features(feature={
        "customerID": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"12345"])),
        "InternetService": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Fiber optic"])),
        "SeniorCitizen": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Yes"])),
        "PaperlessBilling": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Yes"])),
        "Partner": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"No"])),
        "PhoneService": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Yes"])),
        "StreamingTV": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"No"])),
        "gender": tf.train.Feature(bytes_list=tf.train.BytesList(value=[b"Female"])),
        "MonthlyCharges": tf.train.Feature(float_list=tf.train.FloatList(value=[70.0])),
        "TotalCharges": tf.train.Feature(float_list=tf.train.FloatList(value=[2000.0])),
        "tenure": tf.train.Feature(int64_list=tf.train.Int64List(value=[24]))
    }))
    return example.SerializeToString()

# Encode ke base64
encoded_example = encode_example()
encoded_example_b64 = base64.b64encode(encoded_example).decode('utf-8')

print(encoded_example_b64)  # Copy hasil output ini


CpQCChgKDVNlbmlvckNpdGl6ZW4SBwoFCgNZZXMKFwoKY3VzdG9tZXJJRBIJCgcKBTEyMzQ1CiIKD0ludGVybmV0U2VydmljZRIPCg0KC0ZpYmVyIG9wdGljChQKBmdlbmRlchIKCggKBkZlbWFsZQoRCgdQYXJ0bmVyEgYKBAoCTm8KGgoOTW9udGhseUNoYXJnZXMSCBIGCgQAAIxCCg8KBnRlbnVyZRIFGgMKARgKGAoMVG90YWxDaGFyZ2VzEggSBgoEAAD6RAoXCgxQaG9uZVNlcnZpY2USBwoFCgNZZXMKFQoLU3RyZWFtaW5nVFYSBgoECgJObwobChBQYXBlcmxlc3NCaWxsaW5nEgcKBQoDWWVz


In [1]:
import pandas as pd
import requests

# URL dataset
url = "https://raw.githubusercontent.com/dicodingacademy/assets/main/Simulation/machine_learning/bbc-text2.csv"

# Download CSV
response = requests.get(url)
csv_filename = "bbc-text2.csv"

# Simpan ke file lokal
with open(csv_filename, "wb") as file:
    file.write(response.content)

print(f"File {csv_filename} berhasil diunduh!")




File bbc-text2.csv berhasil diunduh!


In [None]:
# Baca CSV ke dalam DataFrame Pandas
df = pd.read_csv(csv_filename)

# Tampilkan beberapa baris pertama
df.head()