In [1]:
from redis import Redis

import atdata.local as al

In [2]:
test_url = "rediss://default:AWWlAAIncDEzZGM2ZGM2NjJiMWE0N2EwYjI0ZWM1ZTcxYWIyYjIxZXAxMjYwMjE@settled-moose-26021.upstash.io:6379"
test_redis = Redis.from_url( test_url )

In [3]:
local_repo = al.Repo(
    s3_credentials = '.credentials/r2-analysis-hive.env',
    hive_path = 'analysis-hive/test-hive/',
    redis = test_redis,
)

In [4]:
import atdata
import webdataset as wds
from dataclasses import dataclass
from numpy.typing import NDArray

@dataclass
class TestSample( atdata.PackableSample ):
    identifier: int
    values: NDArray

In [5]:
import numpy as np
n = 950

import os
os.makedirs( 'data', exist_ok = True )

with wds.writer.ShardWriter( 'data/test-%06d.tar', maxcount = 100 ) as sink:
    for i in range( n ):
        new_sample = TestSample(
            identifier = i + 1,
            values = np.random.normal( size = (1024, 256) ),
        )
        sink.write( new_sample.as_wds )

# writing data/test-000000.tar 0 0.0 GB 0
# writing data/test-000001.tar 100 0.2 GB 100
# writing data/test-000002.tar 100 0.2 GB 200
# writing data/test-000003.tar 100 0.2 GB 300
# writing data/test-000004.tar 100 0.2 GB 400
# writing data/test-000005.tar 100 0.2 GB 500
# writing data/test-000006.tar 100 0.2 GB 600
# writing data/test-000007.tar 100 0.2 GB 700
# writing data/test-000008.tar 100 0.2 GB 800
# writing data/test-000009.tar 100 0.2 GB 900


In [6]:
new_dataset = atdata.Dataset[TestSample]( 'data/test-{000000..000009}.tar' )
local_repo.insert( new_dataset,
    maxcount = 150,
    cache_local = True,
)

# writing analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--000000.tar 0 0.0 GB 0
Copying file to s3 ... done.
Deleting local cache file ... done.
# writing analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--000001.tar 150 0.3 GB 150
Copying file to s3 ... done.
Deleting local cache file ... done.
# writing analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--000002.tar 150 0.3 GB 300
Copying file to s3 ... done.
Deleting local cache file ... done.
# writing analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--000003.tar 150 0.3 GB 450
Copying file to s3 ... done.
Deleting local cache file ... done.
# writing analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--000004.tar 150 0.3 GB 600
Copying file to s3 ... done.
Deleting local cache file ... done.
# writing analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--000005.tar 150 0.3 GB 750
Copying file to s3 ... done.
Deleting local

(BasicIndexEntry(wds_url='analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--{000000..-00001}.tar', sample_kind='__main__.TestSample', metadata_url='analysis-hive/test-hive/metadata/atdata-metadata--bf546505-3e70-44d4-ab1f-9729ad25ce8c.msgpack', uuid='bf546505-3e70-44d4-ab1f-9729ad25ce8c'),
 <atdata.dataset.Dataset at 0x111daa9f0>)

In [8]:
x = local_repo.index.list()[0]

In [7]:
for e in local_repo.index.entries:
    print( e )

BasicIndexEntry(wds_url='analysis-hive/test-hive/atdata--138f45b5-3ba4-46b7-9521-4930093e2ffc--{000000..000000}.tar', sample_kind='atdata.local.T', metadata_url='analysis-hive/test-hive/metadata/atdata-metadata--138f45b5-3ba4-46b7-9521-4930093e2ffc.msgpack', uuid='138f45b5-3ba4-46b7-9521-4930093e2ffc')
BasicIndexEntry(wds_url='analysis-hive/test-hive/atdata--66854cd0-1c3e-4ad2-a757-e100d566a878--{000000..000006}.tar', sample_kind='__main__.TestSample', metadata_url='analysis-hive/test-hive/metadata/atdata-metadata--66854cd0-1c3e-4ad2-a757-e100d566a878.msgpack', uuid='66854cd0-1c3e-4ad2-a757-e100d566a878')
BasicIndexEntry(wds_url='analysis-hive/test-hive/atdata--bf546505-3e70-44d4-ab1f-9729ad25ce8c--{000000..-00001}.tar', sample_kind='__main__.TestSample', metadata_url='analysis-hive/test-hive/metadata/atdata-metadata--bf546505-3e70-44d4-ab1f-9729ad25ce8c.msgpack', uuid='bf546505-3e70-44d4-ab1f-9729ad25ce8c')
