In [0]:
%load_ext autoreload
%autoreload 2
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
import sys
import os
sys.path.append(os.path.abspath('./odibi_de_v2'))
os.environ["PYTHONDONTWRITEBYTECODE"] = "1"
from odibi_de_v2.transformer import SQLTransformerFromConfig, SparkEventSplitter,SparkRuleBasedMapper
from odibi_de_v2.config import ConfigUtils

In [0]:
from pyspark.sql.types import StructType, StructField, StringType

# Define the schema explicitly
schema = StructType([
    StructField("OEE_EVENT_TYPE", StringType(), True),
    StructField("OEE_UNPLNNED_REASONS", StringType(), True),
    StructField("OEE_PLANNED_REASONS", StringType(), True),
    StructField("OEE_OFF_REASONS", StringType(), True),
    StructField("OEE_EVENT_TARGET", StringType(), True),
    StructField("OEE_EVENT_ACTUAL", StringType(), True)
])

# Dummy DataFrame
data = [
    ("Production", "", "", "", None, None),
    ("Unplanned Downtime Event", "Pump failure", "", "", None, None),
    ("Planned Downtime", "", "Scheduled Maintenance", "", None, None),
    ("Market Driven Down Time", "", "", "Low Demand", None, None),
    ("Unknown Event", "", "", "", None, None),
]

df = spark.createDataFrame(data, schema=schema)

display(df)

# Create the Transformer
transformer = SparkRuleBasedMapper(
    column_name="DETERMINED_REASON",
    rules=[
        {"condition": "OEE_EVENT_TYPE == 'Production'", "value": "Production Event","use_column": False},
        {"condition": "OEE_EVENT_TYPE == 'Unplanned Downtime Event' and OEE_UNPLNNED_REASONS != ''", "value": "OEE_UNPLNNED_REASONS","use_column": True},
        {"condition": "OEE_EVENT_TYPE == 'Planned Downtime' and OEE_PLANNED_REASONS != ''", "value": "OEE_PLANNED_REASONS","use_column": True},
        {"condition": "OEE_EVENT_TYPE == 'Market Driven Down Time' and OEE_OFF_REASONS != ''", "value": "OEE_OFF_REASONS","use_column": True},
    ],
    default_value="Mismatched Reason"
)

# Transform the DataFrame
result_df = transformer.transform(df)

# Show the results
display(result_df.select(
    "OEE_EVENT_TYPE",
    "OEE_UNPLNNED_REASONS",
    "OEE_PLANNED_REASONS",
    "OEE_OFF_REASONS",
    "DETERMINED_REASON"
))

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType
import pyspark.sql.functions as F

# Import your upgraded SparkRuleBasedMapper
# from spark_rule_based_mapper import SparkRuleBasedMapper

# Start Spark
spark = SparkSession.builder.master("local[*]").appName("TestAllWithDynamic").getOrCreate()

# Sample Data
schema = StructType([
    StructField("status", StringType(), True),
    StructField("score", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("type", StringType(), True),
    StructField("region", StringType(), True),
    StructField("level", StringType(), True),
])

data = [
    {"status": "Active", "score": 95, "name": "John Doe", "type": "Gold", "region": "North", "level": "A"},
    {"status": "Inactive", "score": 45, "name": "Jane Smith", "type": "Silver", "region": "South", "level": "B"},
    {"status": "Active", "score": 70, "name": "Johnny Appleseed", "type": "Bronze", "region": "West", "level": None},
    {"status": None, "score": 55, "name": "Unknown Name", "type": None, "region": "East", "level": "C"},
    {"status": "Pending", "score": 30, "name": "Jim Brown", "type": "Gold", "region": "South", "level": "D"},
    {"status": "Active", "score": 85, "name": "Joanna Blue", "type": "Platinum", "region": "North", "level": "A"},
]

df = spark.createDataFrame(data, schema)

# Rules
rules = [
    {"condition": "status == 'Active' AND score > 90", "value": "Top Performer", "use_column": False},
    {"condition": "status != 'Active'", "value": "Non-Active", "use_column": False},
    {"condition": "score >= 60 AND score <= 80", "value": 'lambda df: df["score"] * 1.1', "use_column": False},
    {"condition": "name like 'John%'", "value": lambda df: F.concat(df["name"], F.lit(" (Family)")), "use_column": False},
    {"condition": "type in ('Gold', 'Silver')", "value": "Valued Customer", "use_column": False},
    {"condition": "region not in ('East', 'West')", "value": "Preferred Region", "use_column": False},
    {"condition": "level is null", "value": "No Level", "use_column": False},
    {"condition": "level is not null", "value": "Level Exists", "use_column": False},
]

# Create and apply mapper
mapper = SparkRuleBasedMapper(
    column_name="mapped_category",
    rules=rules,
    default_value="Other"
)

transformed_df = mapper.transform(df)

# Show result
transformed_df.select("status", "score", "name", "type", "region", "level", "mapped_category").show(truncate=False)
