In [None]:
import polars as pl
import torch
import numpy as np
import duckdb

## Image Tensors -> DuckDB

This assumes the images have been loaded into tensors.

### Load the tensors and labels into a dataframe

In [None]:
n_images = 1000
images = torch.randn(n_images, 3, 28, 28)

list_of_images = [image.flatten().numpy() for image in images]

df = pl.DataFrame(
    {
        "image_tensors": list_of_images,
        "labels": [0 if i % 2 == 0 else 1 for i in range(n_images)],
    }
)

df.head(4)

image_tensors,labels
list[f32],i64
"[0.078221, 0.006165, … -1.637237]",0
"[-0.986142, 1.431071, … -0.782766]",1
"[-0.11733, -0.171572, … -1.526892]",0
"[-1.438237, 0.67865, … -1.042472]",1


### Load the dataframe to duckdb

In [None]:
con = duckdb.connect('image_dataset.db')

con.sql(f'CREATE TABLE IMAGES AS SELECT * FROM df')

con.sql('FROM images').show()

┌─────────────────────────────────────────────────────────────────────────────────────────────────────────────┬────────┐
│                                                image_tensors                                                │ labels │
│                                                   float[]                                                   │ int64  │
├─────────────────────────────────────────────────────────────────────────────────────────────────────────────┼────────┤
│ [0.07822117, 0.006164769, -0.52732444, -0.33928436, 0.2004719, 0.44075122, -0.017990295, 1.2530198, -0.42…  │      0 │
│ [-0.9861415, 1.431071, -1.919284, 1.1519945, 0.80238366, -1.218601, -0.09066243, -2.352858, 1.1527617, 1.…  │      1 │
│ [-0.11733001, -0.17157178, -0.22701412, 0.2785775, 0.11213446, 0.50116795, 1.2962005, -1.600458, 0.689514…  │      0 │
│ [-1.4382374, 0.6786499, 2.12278, 1.2003614, -0.63969743, -0.026586259, 0.48129764, -0.43329766, 1.7827098…  │      1 │
│ [-0.62999773, 0.024809627, 1.1

## DuckDB -> Image Tensors

In [None]:
df = pl.DataFrame(con.sql('FROM images').to_df())

retrieved = (
    torch.tensor(df['image_tensors'])
    .view(-1, 3, 28, 28)
)

retrieved.shape

torch.Size([1000, 3, 28, 28])