# Lazy Plan

In [4]:
import daft

df = daft.from_pydict({
    "a": [3, 2, 5, 6, 1, 4],
    "b": [True, False, False, True, True, False]
})
df.where(df["b"] == True).sort(df["a"]).explain(show_all=True)

== Unoptimized Logical Plan ==

* Sort: Sort by = (col(a), ascending, nulls last)
|
* Filter: col(b) == lit(true)
|
* Source:
|   Number of partitions = 1
|   Output schema = a#Int64, b#Boolean


== Optimized Logical Plan ==

* Sort: Sort by = (col(a), ascending, nulls last)
|   Stats = { Approx num rows = 2, Approx size bytes = 10 B, Accumulated selectivity
|     = 0.20 }
|
* Filter: col(b)
|   Stats = { Approx num rows = 2, Approx size bytes = 10 B, Accumulated selectivity
|     = 0.20 }
|
* Source:
|   Number of partitions = 1
|   Output schema = a#Int64, b#Boolean
|   Stats = { Approx num rows = 6, Approx size bytes = 49 B, Accumulated selectivity
|     = 1.00 }


== Physical Plan ==

* Sort: Sort by = (col(a), ascending, nulls last)
|   Stats = { Approx num rows = 2, Approx size bytes = 10 B, Accumulated selectivity
|     = 0.20 }
|
* Filter: col(b)
|   Stats = { Approx num rows = 2, Approx size bytes = 10 B, Accumulated selectivity
|     = 0.20 }
|
* InMemorySource:
|   Schema = 

# Expressions

In [5]:
df.where(df["b"] == True).sort(df["a"]).collect()

a Int64,b Boolean
1,True
3,True
6,True


In [6]:
df.exclude("b").show()

a Int64
3
2
5
6
1
4


In [7]:
from daft import col

df = df.with_column(
    "c",
    col("a") * col("b").if_else(1.0, 0.5) * 2
)
df.show()

a Int64,b Boolean,c Float64
3,True,6
2,False,2
5,False,5
6,True,12
1,True,2
4,False,4


In [8]:

df = daft.from_pydict({
    "urls": [
        "https://www.baidu.com",
    ],
})
df = df.with_column("data", df["urls"].url.download())
df.collect()

urls Utf8,data Binary
https://www.baidu.com,"b""<html>\r\n<head>\r\n\t<script>\r\n""..."


# UDF

In [9]:
import daft
import numpy as np

df = daft.from_pydict({
    # the `image` column contains images represented as 2D numpy arrays
    "image": [np.ones((128, 128)) for i in range(16)],
    # the `crop` column contains a box to crop from our image, represented as a list of integers: [x1, x2, y1, y2]
    "crop": [[0, 1, 0, 1] for i in range(16)],
})

In [10]:
df.with_column(
    "flattened_image",
    df["image"].apply(lambda img: img.flatten(), return_dtype=daft.DataType.python())
).show(2)

image Tensor(Float64),crop List[Int64],flattened_image Python
"<Tensor shape=(128, 128)>","[0, 1, 0, 1]","<np.ndarray shape=(16384,) dtype=float64>"
"<Tensor shape=(128, 128)>","[0, 1, 0, 1]","<np.ndarray shape=(16384,) dtype=float64>"


In [11]:
@daft.udf(return_dtype=daft.DataType.python())
def crop_images(images, crops, padding=0):
    cropped = []
    for img, crop in zip(images, crops):
        x1, x2, y1, y2 = crop
        cropped_img = img[x1:x2 + padding, y1:y2 + padding]
        cropped.append(cropped_img)
    return cropped

df.with_column(
    "cropped",
    crop_images(df["image"], df["crop"], padding=1),
).show(2)

image Tensor(Float64),crop List[Int64],cropped Python
"<Tensor shape=(128, 128)>","[0, 1, 0, 1]","<np.ndarray shape=(2, 2) dtype=float64>"
"<Tensor shape=(128, 128)>","[0, 1, 0, 1]","<np.ndarray shape=(2, 2) dtype=float64>"
