# Full Observability Demo: Logging, MLflow & OpenLineage

This notebook demonstrates the integrated use of structured logging (`DataLogger`), experiment tracking (`MLflow`), and data lineage (`OpenLineage`).

In [None]:
import sys
import os
from uuid import uuid4

# Add the src directory to the Python path
sys.path.append(os.path.abspath(os.path.join('..', 'src')))

# Import our custom utilities
from utils.logging import DataLogger
from utils.mlflow_tracking import get_or_create_experiment
from utils.lineage_tracking import get_lineage_client, emit_lineage_event

# Import ML/OL libraries
import mlflow
from openlineage.client.run import RunState
from openlineage.client.dataset import Dataset, Source
from openlineage.client.facet import DataSourceDatasetFacet

In [None]:
# 1. Initialize Services
logger = DataLogger(name='observability_demo', log_file='../logs/demo.log')
logger.log_event('NOTEBOOK_START', {'notebook_name': '02_full_observability_demo.ipynb'})

lineage_client = get_lineage_client()
logger.log_event('OPENLINEAGE_CLIENT_CREATED', {})

In [None]:
# 2. Define Job and Experiment Details
experiment_name = "Full_Observability_Demo"
job_name = "process_raw_data"
run_uuid = str(uuid4()) # Unique ID for both MLflow run and Lineage run

# 3. Define Lineage Datasets
input_dataset = Dataset(
        source=Source(producer_url="file://"),
        name="/data/raw/input.csv",
        facets={"dataSource": DataSourceDatasetFacet(name="raw_data", uri="file://data/raw")}
    )
output_dataset = Dataset(
        source=Source(producer_url="file://"),
        name="/data/processed/output.csv",
        facets={"dataSource": DataSourceDatasetFacet(name="processed_data", uri="file://data/processed")}
    )

In [None]:
# 4. Execute the Full Workflow
experiment_id = get_or_create_experiment(experiment_name)

try:
    # Emit lineage START event
    emit_lineage_event(lineage_client, job_name, RunState.START, run_uuid, input_datasets=[input_dataset])
    
    with mlflow.start_run(experiment_id=experiment_id, run_name=f"{job_name}_{run_uuid[:8]}") as run:
        logger.log_event('MLFLOW_RUN_START', {'run_id': run.info.run_id, 'lineage_run_id': run_uuid})
        
        # --- Your data processing and model training logic would go here ---
        print('Processing data and training model...')
        mlflow.log_param("dataset_version", "v1.0")
        mlflow.log_metric("final_accuracy", 0.99)
        # ---
        
    # Emit lineage COMPLETE event
    emit_lineage_event(lineage_client, job_name, RunState.COMPLETE, run_uuid, output_datasets=[output_dataset])
    logger.log_event('WORKFLOW_SUCCESS', {'run_id': run.info.run_id})

except Exception as e:
    # Emit lineage FAIL event on error
    emit_lineage_event(lineage_client, job_name, RunState.FAIL, run_uuid, output_datasets=[])
    logger.log_error('WORKFLOW_FAILED', str(e), {'run_id': run.info.run_id})
    raise e

### Viewing the Results

* **MLflow**: Run `uv run mlflow ui` in your terminal and open `http://127.0.0.1:5000`.
* **OpenLineage**: Inspect the `lineage.log` file in the project root to see the raw lineage events.
* **Logging**: Check the `logs/demo.log` file for the structured JSON logs.