In [0]:
import sys
import os
sys.path.append(os.path.abspath('./odibi_de_v2'))

In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, StructType
from odibi_de_v2.spark_utils.validation import (
   is_spark_dataframe,
   has_columns,
   is_empty,
   has_nulls_in_columns,
   has_duplicate_columns,
   is_flat_dataframe
)

# Use the shared SparkSession in Databricks
spark = spark if "spark" in globals() else SparkSession.builder.getOrCreate()

# Define common schema
schema = StructType([
   StructField("id", IntegerType(), True),
   StructField("name", StringType(), True)
])

# Test DataFrames
df_valid = spark.createDataFrame([(1, "A")], schema)
df_invalid = "not_a_dataframe"

df_empty = spark.createDataFrame([], schema)
df_nulls = spark.createDataFrame([(1, None), (2, "B")], schema)

df_duplicates = spark.createDataFrame([(1, 2)], ["a", "a"])
df_clean = spark.createDataFrame([(1, 2)], ["x", "y"])

df_nested = spark.read.json(spark.sparkContext.parallelize([
   '{"id": 1, "info": {"score": 10}}'
]))

# Run Tests
print("is_spark_dataframe (valid):", is_spark_dataframe(df_valid))       # True
print("is_spark_dataframe (invalid):", is_spark_dataframe(df_invalid))   # False

print("has_columns ['id']:", has_columns(df_valid, ["id"]))              # True
print("has_columns ['missing']:", has_columns(df_valid, ["missing"]))    # False

print("is_empty (True):", is_empty(df_empty))                            # True
print("is_empty (False):", is_empty(df_valid))                           # False

print("has_nulls_in_columns ['name']:", has_nulls_in_columns(df_nulls, ["name"]))  # True
print("has_nulls_in_columns ['id']:", has_nulls_in_columns(df_nulls, ["id"]))      # False

print("has_duplicate_columns (True):", has_duplicate_columns(df_duplicates))       # True
print("has_duplicate_columns (False):", has_duplicate_columns(df_clean))           # False

print("is_flat_dataframe (True):", is_flat_dataframe(df_valid))          # True
print("is_flat_dataframe (False):", is_flat_dataframe(df_nested))        # False