# Using Unity Catalog with Daft

## Set up

In [4]:
!pip install -U "getdaft[unity]"



In [12]:
# import modules
import daft
from daft.unity_catalog import UnityCatalog

In [None]:
# in terminal window launch a local UnityCatalog server
# bin/start-uc-server

## Connect Daft to Unity Catalog

In [8]:
# point Daft to your UC server
unity = UnityCatalog(
    endpoint="http://127.0.0.1:8080",
    token="not-used",
)

In [9]:
# See all available catalogs
print(unity.list_catalogs())

['unity']


In [10]:
# See available schemas in a given catalog
print(unity.list_schemas("unity"))

['unity.default']


In [11]:
# See available tables in a given schema
print(unity.list_tables("unity.default"))

['unity.default.numbers', 'unity.default.marksheet_uniform', 'unity.default.marksheet']


## Load Unity Tables into Daft DataFrame
Let's look at how we can read Delta Lake tables in a Unity Catalog with Daft.

First, point Daft to your Delta table stored in your Unity Catalog:

In [14]:
unity_table = unity.load_table("unity.default.numbers")

Then simply read this table using the Daft `read_delta_lake` method:

In [30]:
df = daft.read_delta_lake(unity_table)
df.show(5)

as_int Int32,as_double Float64
564,188.75535598441476
755,883.6105633023361
644,203.4395591086936
75,277.8802190765611
42,403.857969425109


In [28]:
df_p.to_markdown()

'|    |   as_int |   as_double |\n|---:|---------:|------------:|\n|  0 |      564 |    188.755  |\n|  1 |      755 |    883.611  |\n|  2 |      644 |    203.44   |\n|  3 |       75 |    277.88   |\n|  4 |       42 |    403.858  |\n|  5 |      680 |    797.691  |\n|  6 |      821 |    767.8    |\n|  7 |      484 |    344.004  |\n|  8 |      477 |    380.679  |\n|  9 |      131 |     35.4437 |\n| 10 |      294 |    209.322  |\n| 11 |      150 |    329.197  |\n| 12 |      539 |    425.661  |\n| 13 |      247 |    477.742  |\n| 14 |      958 |    509.371  |'

Any subsequent filter operations on the Daft `df` DataFrame object will be correctly optimized to take advantage of Delta Lake features.

In [19]:
# HOW DO I SEE PARTITION_KEY OF DELTA TABLE??
# Filter which takes advantage of partition pruning capabilities of Delta Lake
df = df.where(df["as_int"] > 500)
df.show()

as_int Int32,as_double Float64
564,188.75535598441476
755,883.6105633023361
644,203.4395591086936
680,797.6912200731077
821,767.7998537403159
539,425.66102859000944
958,509.3712727285101


In [None]:
print(unity.list_tables)