# 1. Setup & Import

In [1]:
import os
import pandas as pd
from typing import Text
from absl import logging
from tfx.orchestration import metadata, pipeline
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner
from tfx.dsl.components.common.resolver import Resolver
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import LatestBlessedModelStrategy
from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2, tuner_pb2 


# Pipeline Name & Root
PIPELINE_NAME = "andreaswd31-dicoding_pipeline_project2"

# Folder Paths
DATA_ROOT = "data"
TRANSFORM_MODULE_FILE = os.path.join(os.getcwd(), "modules", "transform_v2.py")
TRAINER_MODULE_FILE = os.path.join(os.getcwd(), "modules", "trainer_v2.py")

# Artifact Output Paths
OUTPUT_BASE = "output"
SERVING_MODEL_DIR = os.path.join(OUTPUT_BASE, 'serving_model')
PIPELINE_ROOT = os.path.join(OUTPUT_BASE, PIPELINE_NAME)
METADATA_PATH = os.path.join(PIPELINE_ROOT, "metadata.sqlite")

print(f"Pipeline Root: {PIPELINE_ROOT}")

Pipeline Root: output\andreaswd31-dicoding_pipeline_project2


In [2]:
# --- VALIDASI CSV (WAJIB) ---
# Pastikan Churn sudah jadi 0/1 di file CSV
import pandas as pd
import os

csv_path = os.path.join(DATA_ROOT, 'WA_Fn-UseC_-Telco-Customer-Churn.csv')
df_check = pd.read_csv(csv_path)

# Cek tipe data kolom Churn
print("Unique Churn Values:", df_check['Churn'].unique())

# Jika masih ada 'Yes'/'No', paksa ubah dan simpan ulang
if df_check['Churn'].dtype == 'object':
    print("⚠️ Terdeteksi Label String. Sedang memperbaiki...")
    df_check['Churn'] = df_check['Churn'].apply(lambda x: 1 if x == 'Yes' else 0)
    df_check.to_csv(csv_path, index=False)
    print("✅ CSV diperbaiki. Churn sekarang integer.")
else:
    print("✅ CSV Aman. Churn sudah integer.")

Unique Churn Values: [0 1]
✅ CSV Aman. Churn sudah integer.


# 2. Komponen TFX: Ingestion, Validation, Transform

In [3]:
import tensorflow_model_analysis as tfma

from tfx.components import (
    CsvExampleGen, 
    StatisticsGen, 
    SchemaGen, 
    ExampleValidator, 
    Transform, 
    Tuner, 
    Trainer,
    Evaluator,
    Pusher,
)

from tfx.proto import example_gen_pb2, trainer_pb2, pusher_pb2
from tfx.types import Channel
from tfx.types.standard_artifacts import Model, ModelBlessing

# Resolver import yg benar
from tfx.dsl.components.common.resolver import Resolver
from tfx.dsl.input_resolution.strategies.latest_blessed_model_strategy import LatestBlessedModelStrategy


# 1. ExampleGen
output_config = example_gen_pb2.Output(
    split_config=example_gen_pb2.SplitConfig(splits=[
        example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=8),
        example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=2)
    ])
)
example_gen = CsvExampleGen(input_base=DATA_ROOT, output_config=output_config)

# 2. Stats & Schema
statistics_gen = StatisticsGen(examples=example_gen.outputs['examples'])
schema_gen = SchemaGen(statistics=statistics_gen.outputs['statistics'], infer_feature_shape=True)
example_validator = ExampleValidator(statistics=statistics_gen.outputs['statistics'], schema=schema_gen.outputs['schema'])

# 3. Transform (Panggil module dari folder 'modules')
transform = Transform(
    examples=example_gen.outputs['examples'],
    schema=schema_gen.outputs['schema'],
    module_file=os.path.abspath(TRANSFORM_MODULE_FILE)
)

# 3. Komponen TFX: Tuner & Trainer

In [4]:
# 4. Tuner
tuner = Tuner(
    module_file=os.path.abspath(TRAINER_MODULE_FILE),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    train_args=trainer_pb2.TrainArgs(splits=['train'], num_steps=20),
    eval_args=trainer_pb2.EvalArgs(splits=['eval'], num_steps=5)
)

# 5. Trainer (Pakai Hyperparameter dari Tuner)
trainer = Trainer(
    module_file=os.path.abspath(TRAINER_MODULE_FILE),
    examples=transform.outputs['transformed_examples'],
    transform_graph=transform.outputs['transform_graph'],
    schema=schema_gen.outputs['schema'],
    hyperparameters=tuner.outputs['best_hyperparameters'],
    train_args=trainer_pb2.TrainArgs(splits=['train'], num_steps=100),
    eval_args=trainer_pb2.EvalArgs(splits=['eval'], num_steps=50)
)

# 4. Komponen TFX: Evaluator & Pusher

In [6]:
# 6. Resolver
model_resolver = Resolver(
    strategy_class=LatestBlessedModelStrategy,
    model=Channel(type=Model),
    model_blessing=Channel(type=ModelBlessing)
).with_id('latest_blessed_model_resolver')

# 7. Evaluator
eval_config = tfma.EvalConfig(
    model_specs=[
        # PERBAIKAN: Gunakan label asli 'Churn' karena inputnya dari ExampleGen (Raw Data)
        tfma.ModelSpec(label_key='Churn') 
    ],
    slicing_specs=[tfma.SlicingSpec()],
    metrics_specs=[
        tfma.MetricsSpec(metrics=[
            tfma.MetricConfig(class_name='BinaryAccuracy',
                threshold=tfma.MetricThreshold(
                    value_threshold=tfma.GenericValueThreshold(lower_bound={'value': 0.5}),
                    # Change Threshold opsional, bisa dihapus jika baseline belum stabil, 
                    # tapi kita biarkan dulu sesuai standar.
                    change_threshold=tfma.GenericChangeThreshold(
                        direction=tfma.MetricDirection.HIGHER_IS_BETTER,
                        absolute={'value': -1e-10})))
        ])
    ])

evaluator = Evaluator(
    examples=example_gen.outputs['examples'], # Input Data Mentah
    model=trainer.outputs['model'],
    baseline_model=model_resolver.outputs['model'],
    eval_config=eval_config
)

# 8. Pusher
pusher = Pusher(
    model=trainer.outputs['model'],
    model_blessing=evaluator.outputs['blessing'],
    push_destination=pusher_pb2.PushDestination(
        filesystem=pusher_pb2.PushDestination.Filesystem(
            base_directory=SERVING_MODEL_DIR))
)

In [7]:
# --- PENGGANTI BEAM DAG RUNNER (AGAR BISA BYPASS BUG TUNER) ---
# Kita gunakan InteractiveContext agar bisa menangkap error visual Tuner
# dan memastikan Trainer & Pusher tetap jalan.

from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

# 1. Inisialisasi Context (Pastikan root-nya sama)
context = InteractiveContext(pipeline_root=PIPELINE_ROOT)

# 2. Daftar Komponen
components_list = [
    example_gen,
    statistics_gen,
    schema_gen,
    example_validator,
    transform,
    tuner,          # <--- Si Biang Kerok
    trainer,
    model_resolver,
    evaluator,
    pusher
]

# 3. Eksekusi Satu per Satu
for component in components_list:
    print(f"\n Menjalankan {component.id}...")
    try:
        context.run(component)
        print(f"{component.id} SELESAI.")
    except Exception as e:
        # Tangkap Error Bug Visual TFX 1.11 di Tuner
        error_msg = str(e)
        if "'<' not supported between instances of 'int' and 'NoneType'" in error_msg:
            print(f"{component.id}: Terdeteksi Bug Visual TFX (Abaikan).")
            print("   Proses Tuning sebenarnya SUKSES. Artifact tersimpan.")
            print(f"{component.id} dianggap SELESAI (Bypass).")
        
        # Tangkap Error Metadata (AlreadyExists) - Sering terjadi kalau re-run
        elif "AlreadyExistsError" in error_msg or "'NoneType' object has no attribute 'name'" in error_msg:
            print(f"{component.id}: Masalah Metadata (Abaikan). Pipeline tetap lanjut.")
        
        else:
            # Jika error lain yang fatal, Stop.
            print(f"{component.id} GAGAL FATAL!")
            raise e

Trial 10 Complete [00h 00m 04s]
val_binary_accuracy: 0.784375011920929

Best val_binary_accuracy So Far: 0.840624988079071
Total elapsed time: 00h 00m 37s
Results summary
Results in output\andreaswd31-dicoding_pipeline_project2\.temp\48\telco_churn_kt
Showing 10 best trials
Objective(name="val_binary_accuracy", direction="max")

Trial 0005 summary
Hyperparameters:
units: 96
num_layers: 1
learning_rate: 0.01
tuner/epochs: 5
tuner/initial_epoch: 2
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0000
Score: 0.840624988079071

Trial 0008 summary
Hyperparameters:
units: 128
num_layers: 2
learning_rate: 0.001
tuner/epochs: 5
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.8125

Trial 0000 summary
Hyperparameters:
units: 96
num_layers: 1
learning_rate: 0.01
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.809374988079071

Trial 0001 summary
Hyperparameters:
units: 96
num_layers: 3
learning_rate: 0.01
tuner/epochs: 2
tuner/initial_epoch: 0
tuner/b







INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:struct2tensor is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_decision_forests is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:tensorflow_text is not available.


INFO:tensorflow:Assets written to: output\andreaswd31-dicoding_pipeline_project2\Trainer\model\49\Format-Serving\assets


INFO:tensorflow:Assets written to: output\andreaswd31-dicoding_pipeline_project2\Trainer\model\49\Format-Serving\assets


Trainer SELESAI.

 Menjalankan latest_blessed_model_resolver...
latest_blessed_model_resolver SELESAI.

 Menjalankan Evaluator...












Evaluator GAGAL FATAL!


ValueError: no value provided for label

This may be caused by a configuration error (i.e. label, and/or prediction keys were not specified) or an error in the pipeline.

to_label_prediction_example_weight(inputs={'labels': None, 'predictions': array([0.04115891], dtype=float32)}, eval_config=model_specs {
  label_key: "Churn_xf"
}
slicing_specs {
}
metrics_specs {
  metrics {
    class_name: "BinaryAccuracy"
    threshold {
      value_threshold {
        lower_bound {
          value: 0.5
        }
      }
    }
  }
  model_names: ""
}
, model_name=, output_name=, sub_key=None, aggregation_type=None, class_weights=None, fractional_labels=False, flatten=False, squeeze=True, allow_none=False) [while running 'ExtractEvaluateAndWriteResults/ExtractAndEvaluate/EvaluateMetricsAndPlots/ComputeMetricsAndPlots()/CombineMetricsPerSlice/WindowIntoDiscarding']