# Playground

## Initialize globals

In [1]:
import sys
import os
import io
import shutil
import time

from uuid import uuid4
from typing import Any
from contextlib import redirect_stdout
from pyspark.sql import SparkSession

sys.path.append("../src")

from fabricengineer.transform.mlv import MaterializedLakeView
from fabricengineer.logging import TimeLogger

mlv: MaterializedLakeView
timer: TimeLogger

In [2]:
class NotebookUtilsFSMock:
    def _get_path(self, file: str) -> str:
        return os.path.join(os.getcwd(), file)

    def exists(self, path: str) -> bool:
        return os.path.exists(self._get_path(path))

    def put(
        self,
        file: str,
        content: str,
        overwrite: bool = False
    ) -> None:
        path = self._get_path(file)
        os.makedirs(os.path.dirname(path), exist_ok=True)

        if os.path.exists(path) and not overwrite:
            raise FileExistsError(f"File {path} already exists and overwrite is set to False.")
        with open(path, 'w') as f:
            f.write(content)


class NotebookUtilsMock:
    def __init__(self):
        self.fs = NotebookUtilsFSMock()

global spark
spark = SparkSession.builder.appName("PlaygroundSparkSession").getOrCreate()

global notebookutils
notebookutils = NotebookUtilsMock()

25/07/31 08:51:51 WARN Utils: Your hostname, MacBook-Air-von-Enrico.local resolves to a loopback address: 127.0.0.1; using 192.168.0.3 instead (on interface en0)
25/07/31 08:51:51 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/07/31 08:51:52 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
def sniff_logs(fn: callable) -> tuple[Any, list[str]]:
    log_stream = io.StringIO()
    with redirect_stdout(log_stream):
        result = fn()
    logs = log_stream.getvalue().splitlines()
    return result, logs

In [None]:
def cleanup_fs():
    path = notebookutils.fs._get_path("Files")
    if os.path.exists(path):
        shutil.rmtree(path)

cleanup_fs()

## TimeLogger

In [5]:
with open("../src/fabricengineer/logging/timer.py") as f:
    code = f.read()
exec(code, globals())

timer

TimeLogger(start_time=None, end_time=None, elapsed_time=None)

In [6]:
timer.start().log()
time.sleep(1)
timer.stop().log()

TIMER-START:	2025-07-31 08:51:52
TIMER-END:	2025-07-31 08:51:53, ELAPSED: 1.0152s


## MaterializedLakeView

In [None]:
with open("../src/fabricengineer/transform/mlv/mlv.py") as f:
    code = f.read()
exec(code, globals())


mlv.init(
    lakehouse="Lakehouse",
    schema="schema",
    table="table",
    table_suffix=None,
    is_testing_mock=True
)

mlv.to_dict()

{'lakehouse': 'Lakehouse',
 'schema': 'schema',
 'table': 'table',
 'table_path': 'Lakehouse.schema.table'}

In [52]:
mlv.init(
    lakehouse=str(uuid4()),
    schema="schema",
    table="table",
    table_suffix=None,
    is_testing_mock=True
)

sql = """
SELECT * FROM Lakehouse.schema.table
"""
is_existing = False
for i in range(0, 4):
    if i > 0:
        is_existing = True
    if i == 2:
        sql = """
        SELECT * FROM Lakehouse.schema.table WHERE 1=0
        """
    result, logs = sniff_logs(
        lambda: mlv.create_or_replace(sql, mock_is_existing=is_existing)
    )
    print(f"Logs-{i+1}")
    display(logs)

Logs-1


['Mock: CREATE SCHEMA 1cadbe22-0267-4a1a-a8cb-91b04335b1df.schema',
 'Mock: CREATE MLV 1cadbe22-0267-4a1a-a8cb-91b04335b1df.schema.table']

Logs-2


['Nothing has changed']

Logs-3


['REPLACE MLV: 1cadbe22-0267-4a1a-a8cb-91b04335b1df.schema.table',
 'Mock: DROP MLV 1cadbe22-0267-4a1a-a8cb-91b04335b1df.schema.table',
 'Mock: CREATE SCHEMA 1cadbe22-0267-4a1a-a8cb-91b04335b1df.schema',
 'Mock: CREATE MLV 1cadbe22-0267-4a1a-a8cb-91b04335b1df.schema.table']

Logs-4


['Nothing has changed']

## Clean up the file system

In [53]:
cleanup_fs()