# ODIBI Complete Databricks Test

Tests all Phase 2 features and cleans up afterward

In [None]:
%pip install "git+https://github.com/henryodibi11/Odibi.git#egg=odibi[spark,pandas,azure]" --quiet
dbutils.library.restartPython()

In [None]:
# Validate environment
from odibi.utils import validate_databricks_environment
validate_databricks_environment(verbose=True)

In [None]:
# Setup
import pandas as pd
from pyspark.sql import SparkSession
from odibi.engine import PandasEngine, SparkEngine
import os

spark = SparkSession.getActiveSession()
pandas_engine = PandasEngine()
spark_engine = SparkEngine(spark_session=spark)

# Test paths
TEST_BASE = "/dbfs/tmp/odibi_test"
os.makedirs(TEST_BASE, exist_ok=True)

# Test data
test_data = pd.DataFrame({
    "id": [1, 2, 3, 4, 5],
    "name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "salary": [100000, 80000, 95000, 70000, 85000]
})

print(f"âœ“ Setup complete. Test base: {TEST_BASE}")
print(test_data)

In [None]:
# Test 1: Pandas CSV
print("TEST 1: Pandas CSV")
csv_path = f"{TEST_BASE}/test.csv"
pandas_engine.write(test_data, connection=None, path=csv_path, format="csv")
df = pandas_engine.read(connection=None, path=csv_path, format="csv")
assert len(df) == 5
print("âœ… PASSED")

In [None]:
# Test 2: Pandas Parquet
print("TEST 2: Pandas Parquet")
pq_path = f"{TEST_BASE}/test.parquet"
pandas_engine.write(test_data, connection=None, path=pq_path, format="parquet")
df = pandas_engine.read(connection=None, path=pq_path, format="parquet")
assert len(df) == 5
print("âœ… PASSED")

In [None]:
# Test 3: Pandas Delta Lake
print("TEST 3: Pandas Delta Lake")
delta_path = f"{TEST_BASE}/delta_table"
pandas_engine.write(test_data, connection=None, path=delta_path, format="delta", mode="overwrite")
df = pandas_engine.read(connection=None, path=delta_path, format="delta")
assert len(df) == 5
print("âœ… PASSED")

In [None]:
# Test 4: Spark Parquet
print("TEST 4: Spark Parquet")
spark_df = spark.createDataFrame(test_data)
spark_pq = f"dbfs:{TEST_BASE}/spark.parquet"
spark_engine.write(spark_df, path=spark_pq, format="parquet", mode="overwrite")
df = spark_engine.read(path=spark_pq, format="parquet")
assert df.count() == 5
print("âœ… PASSED")

In [None]:
# Test 5: Spark Delta + SQL
print("TEST 5: Spark Delta + SQL")
spark_delta = f"dbfs:{TEST_BASE}/spark_delta"
spark_engine.write(spark_df, path=spark_delta, format="delta", mode="overwrite")
df = spark_engine.read(path=spark_delta, format="delta")
result = spark_engine.execute_sql("SELECT AVG(salary) as avg_sal FROM data", {"data": df})
assert result.count() > 0
print("âœ… PASSED")

In [None]:
# Test 6: Parallel connections
print("TEST 6: Parallel Connection Setup")
from odibi.connections import AzureADLS
from odibi.utils import configure_connections_parallel

conns = {
    "test1": AzureADLS(account="test1", container="c", auth_mode="direct_key", 
                       account_key="key1", validate=True),
    "test2": AzureADLS(account="test2", container="c", auth_mode="direct_key", 
                       account_key="key2", validate=True),
}

configured, errors = configure_connections_parallel(conns, verbose=False)
assert len(errors) == 0
print("âœ… PASSED")

In [None]:
# Cleanup
import shutil
print("\nðŸ§¹ Cleaning up test data...")
if os.path.exists(TEST_BASE):
    shutil.rmtree(TEST_BASE)
    print(f"âœ“ Removed {TEST_BASE}")

try:
    dbutils.fs.rm(f"dbfs:{TEST_BASE}", recurse=True)
    print(f"âœ“ Removed from DBFS")
except:
    pass

print("\n" + "="*70)
print("ðŸŽ‰ ALL TESTS PASSED!")
print("="*70)
print("\nODIBI Phase 2 is working perfectly in Databricks! ðŸš€")