In [9]:
import atdata
import atdata.atmosphere as atpd

---

In [2]:
@atdata.packable
class ProtoTestSample:
    id: int
    name: str
    message: str
    weight: float

In [3]:
import numpy as np

samples = []
for i in range( 100 ):
    samples.append(
        ProtoTestSample(
            id = i,
            name = f'Test {i + 1}',
            message = f' Hello, from test number {i + 1}!',
            weight = float( np.random.uniform() ),
        )
    )

In [13]:
from dotenv import dotenv_values
env = dotenv_values( '.credentials/atproto.env' )

In [14]:
client = atpd.AtmosphereClient()
client.login( env['AT_HANDLE'], env['AT_PASSWORD'] )

print(f"Authenticated as: {client.handle}")
print(f"DID: {client.did}")

Authenticated as: maxine.science
DID: did:plc:ybmxlogro7nxttbwo5xqjgga


In [15]:
schema_publisher = atpd.SchemaPublisher( client )
schema_uri = schema_publisher.publish(
    ProtoTestSample,
    name = "ProtoTestSample",
    version = "0.1.0",
    description = "Demo created for user-produced prototyping of atdata",
)
print(f"Schema URI: {schema_uri}")

Schema URI: at://did:plc:ybmxlogro7nxttbwo5xqjgga/ac.foundation.dataset.sampleSchema/3mco7hrpwaz27


In [17]:
schema_loader = atpd.SchemaLoader( client )

test_schema = schema_loader.get( schema_uri )

In [19]:
import io
import webdataset as wds

dataset_publisher = atpd.DatasetPublisher( client )

# Create tar in memory
tar_buffer = io.BytesIO()
with wds.writer.TarWriter(tar_buffer) as sink:
    for sample in samples:
        sink.write(sample.as_wds)

tar_data = tar_buffer.getvalue()
print(f"Created tar with {len(samples)} samples ({len(tar_data):,} bytes)")

Created tar with 100 samples (215,040 bytes)


In [20]:
# Publish with blob storage
blob_dataset_uri = dataset_publisher.publish_with_blobs(
    blobs=[tar_data],
    schema_uri=str( schema_uri ),
    name="Blob Storage Demo Dataset",
    description="Small dataset stored directly in ATProto blobs",
    tags=["demo", "blob-storage"],
)
print(f"Dataset URI: {blob_dataset_uri}")

Dataset URI: at://did:plc:ybmxlogro7nxttbwo5xqjgga/ac.foundation.dataset.record/3mco7rwxl4f2w


In [21]:
dataset_loader = atpd.DatasetLoader(client)
datasets = dataset_loader.list_all( limit = 10 )
print( datasets )

[{'name': 'Blob Storage Demo Dataset', 'tags': ['demo', 'blob-storage'], '$type': 'ac.foundation.dataset.record', 'storage': {'$type': 'ac.foundation.dataset.storageBlobs', 'blobs': [{'$type': 'blob', 'ref': {'$link': 'bafkreihzbu5ptgolwdjhdvjyt7kvf3rirv257d5vlrz3zbmr2okkmsezmq'}, 'mimeType': 'application/x-tar', 'size': 215040}]}, 'createdAt': '2026-01-18T03:47:42.965321+00:00', 'schemaRef': 'at://did:plc:ybmxlogro7nxttbwo5xqjgga/ac.foundation.dataset.sampleSchema/3mco7hrpwaz27', 'description': 'Small dataset stored directly in ATProto blobs'}]


In [23]:
# Load and iterate (works for both storage types)
ds = dataset_loader.to_dataset( str( blob_dataset_uri ), ProtoTestSample )
for batch in ds.ordered( batch_size = 16 ):
    break

In [34]:
sample: ProtoTestSample
for sample in batch.samples:
    print( sample.message )

 Hello, from test number 1!
 Hello, from test number 2!
 Hello, from test number 3!
 Hello, from test number 4!
 Hello, from test number 5!
 Hello, from test number 6!
 Hello, from test number 7!
 Hello, from test number 8!
 Hello, from test number 9!
 Hello, from test number 10!
 Hello, from test number 11!
 Hello, from test number 12!
 Hello, from test number 13!
 Hello, from test number 14!
 Hello, from test number 15!
 Hello, from test number 16!
