<img src = "https://github.com/singlestore-labs/spaces-notebooks/blob/e551e274bb67bb1e5081131ee1150cdba713fc43/common/images/singlestore-jupyter.png?raw=true">

<div id="singlestore-header" style="display: flex; background-color: rgba(235, 249, 245, 0.25); padding: 5px;">
    <div id="icon-image" style="width: 90px; height: 90px;">
        <img width="100%" height="100%" src="https://raw.githubusercontent.com/singlestore-labs/spaces-notebooks/master/common/images/header-icons/browser.png" />
    </div>
    <div id="text" style="padding: 5px; margin-left: 10px;">
        <div id="badge" style="display: inline-block; background-color: rgba(0, 0, 0, 0.15); border-radius: 4px; padding: 4px 8px; align-items: center; margin-top: 6px; margin-bottom: -2px; font-size: 80%">SingleStore Notebooks</div>
        <h1 style="font-weight: 500; margin: 8px 0 0 4px;">Using SingleStore for Iceberg Catalog Storage with PyIceberg</h1>
    </div>
</div>

In [9]:
!pip cache purge --quiet

In [10]:
!pip install "pyiceberg[pandas]" --quiet

In [11]:
import os

os.makedirs("warehouse", exist_ok = True)

In [12]:
import pandas as pd

url = "https://gist.githubusercontent.com/VeryFatBoy/9af771d443f5ec4dd6eec8d69a062638/raw/c03ef25a97f23a48ee408ac02114195b663a2364/iris.csv"

pandas_df = pd.read_csv(url)

In [13]:
print(pandas_df.head())

   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [14]:
%%sql
DROP DATABASE IF EXISTS iris_db;
CREATE DATABASE IF NOT EXISTS iris_db;

<div class="alert alert-block alert-warning">
    <b class="fa fa-solid fa-exclamation-circle"></b>
    <div>
        <p><b>Action Required</b></p>
        <p>Select the database from the drop-down menu at the top of this notebook.</p>
    </div>
</div>

In [19]:
%%sql
DROP TABLE IF EXISTS iris;

CREATE TABLE IF NOT EXISTS iris (
    sepal_length FLOAT,
    sepal_width FLOAT,
    petal_length FLOAT,
    petal_width FLOAT,
    species VARCHAR(20)
);

<div class="alert alert-block alert-warning">
    <b class="fa fa-solid fa-exclamation-circle"></b>
    <div>
        <p><b>Action Required</b></p>
        <p>Select the database from the drop-down menu at the top of this notebook. It updates the <b>connection_url</b> which is used by SQLAlchemy to make connections to the selected database.</p>
    </div>
</div>

In [20]:
from sqlalchemy import *

db_connection = create_engine(connection_url)
url = db_connection.url

In [21]:
pandas_df[pandas_df["species"] == "Iris-virginica"].to_sql(
    "iris",
    con = db_connection,
    if_exists = "append",
    index = False
)

50

In [22]:
%%sql
SELECT COUNT(*) FROM iris;

COUNT(*)
50


In [23]:
%%sql
SELECT * FROM iris LIMIT 5;

sepal_length,sepal_width,petal_length,petal_width,species
6.9,3.1,5.4,2.1,Iris-virginica
7.7,2.6,6.9,2.3,Iris-virginica
7.4,2.8,6.1,1.9,Iris-virginica
7.7,3.0,6.1,2.3,Iris-virginica
7.7,2.8,6.7,2.0,Iris-virginica


In [24]:
from singlestoredb.management import get_secret

password = get_secret("password")

In [25]:
from pyiceberg.catalog.sql import SqlCatalog

config = {
    "uri": f"singlestoredb://admin:{password}@{url.host}:{url.port}/{url.database}",
    "warehouse": "warehouse",
}

catalog = SqlCatalog(
    name = "s2_catalog",
    **config
)

catalog.create_namespace("db")

In [26]:
from pyiceberg.exceptions import NoSuchTableError

table = "iris"
table_identifier = f"db.{table}"

try:
    catalog.drop_table(table_identifier)
    print(f"Dropped table {table_identifier} successfully")
except NoSuchTableError:
    print(f"Table {table_identifier} does not exist. Skipping drop.")
except Exception as e:
    print(f"Error dropping table: {e}")

Table db.iris does not exist. Skipping drop.


In [27]:
namespaces = catalog.list_namespaces()

for namespace in namespaces:
    tables = catalog.list_tables(namespace)
    if tables:
        print(f"Namespace: {namespace}")
        for table in tables:
            print(f"- {table}")
    else:
        print(f"Namespace: {namespace} (empty)")

Namespace: ('db',) (empty)


In [28]:
import pyarrow as pa

df = pa.Table.from_pandas(pandas_df)

table = catalog.create_table(
    table_identifier,
    schema = df.schema
)

In [29]:
table.append(df)

len(table.scan().to_arrow())

150

In [30]:
df = table.scan(limit = 5)

print(df.to_pandas())

   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa


In [31]:
df = table.scan(row_filter = "species != 'Iris-virginica'").to_arrow()

In [32]:
table.overwrite(df)

len(table.scan().to_arrow())

100

In [33]:
arrow_table = table.scan().to_arrow()

species_counts = arrow_table["species"].value_counts()

print(species_counts.to_pandas())

0        {'values': 'Iris-setosa', 'counts': 50}
1    {'values': 'Iris-versicolor', 'counts': 50}
dtype: object


In [38]:
df = table.scan(row_filter = "species = 'Iris-virginica'")

print(df.to_pandas())

Empty DataFrame
Columns: [sepal_length, sepal_width, petal_length, petal_width, species]
Index: []


In [39]:
new_df = pd.read_sql(
    "SELECT * FROM iris WHERE species = 'Iris-virginica'",
    con = db_connection
)

In [40]:
table.append(pa.Table.from_pandas(new_df))

len(table.scan().to_arrow())

150

In [43]:
arrow_table = table.scan().to_arrow()

species_counts = arrow_table["species"].value_counts()

print(species_counts.to_pandas())

0     {'values': 'Iris-virginica', 'counts': 50}
1        {'values': 'Iris-setosa', 'counts': 50}
2    {'values': 'Iris-versicolor', 'counts': 50}
dtype: object


In [44]:
%%sql
SHOW TABLES;

Tables_in_iris_db
iceberg_namespace_properties
iceberg_tables
iris
