In [15]:
"""
All rasters were converted to TileDB using the following basic configuration:
No performance testing for optimal compression ratios, compression filters, blocksizes nor I/O were undertaken.
Initial storage comparisons (eyeball) of GeoTIFF to TileDB resulted in a 50% reduction is storage size (not including the sidecar metadata files for the TIFF, nor the pyramids).
Deakin's GeoTIFF's were using LZW for compression, so in some circles, the worst of the worst, as such not an apples and apples comparison.
Some provided data has block sizes of 2048, compared to here in using 512. Eg a deflate 2048 in geotiff was ~22% larger than tiledb 512 zstd
gdal_translate -of TileDB -co TILEDB_CONFIG=tiledb-config.txt -co COMPRESSION=ZSTD -co COMPRESSION_LEVEL=16 -co BLOCKXSIZE=512 -co BLOCKYSIZE=512 
"""

In [None]:
import s3fs
import json
from pathlib import Path
import numpy
import rasterio
from rasterio.session import AWSSession
import boto3
import tiledb
import tiledb.cloud
from tiledb.cloud.compute import DelayedArrayUDF, Delayed
import pandas
import geopandas
import fiona
#from fiona.session import AWSSession
import pystac

In [12]:
session = boto3.Session()
creds = session.get_credentials()

In [13]:
#with rasterio.env.Env(aws_access_key_id=creds.access_key, aws_secret_access_key=creds.secret_key) as Env:
    

In [75]:
with rasterio.Env(AWSSession(session)) as env:
    with rasterio.open("s3://ausseabed-pl019-provided-data/DeakinUniversity/WilsonsProm_WestGlennie_Refuge_SRL/geotif/WilsonsProm_SRL_WestGlennie_Refuge_Z_2m_WGS84_UTMz55S.tif") as ds:
        print(ds.meta)
        print(ds.compression)
    with rasterio.open("s3://ausseabed-pl019-provided-data/JamesCookUniversity/0364_BeagleMarinePark_2018/Products/geotif/ga-0364_tile-s39-5e146-5_bathymetry_egm2008_2018_2m_epsg-4326_20211025.tiff") as ds:
        print(ds.meta)
        print(ds.compression)
        print(ds.block_shapes)

{'driver': 'GTiff', 'dtype': 'float32', 'nodata': 3.4028234663852886e+38, 'width': 28833, 'height': 16132, 'count': 1, 'crs': CRS.from_epsg(32755), 'transform': Affine(2.0, 0.0, 413345.0,
       0.0, -2.0, 5694193.0)}
Compression.lzw
{'driver': 'GTiff', 'dtype': 'float32', 'nodata': 3.4028234663852886e+38, 'width': 16620, 'height': 17025, 'count': 3, 'crs': CRS.from_epsg(4326), 'transform': Affine(1.7966305682390557e-05, 0.0, 146.7014346151391,
       0.0, -1.796630568239043e-05, -39.19413364998645)}
Compression.deflate
[(2048, 2048), (2048, 2048), (2048, 2048)]


In [18]:
config = tiledb.Config(
        {"vfs.s3.aws_access_key_id": creds.access_key, "vfs.s3.aws_secret_access_key": creds.secret_key}
    )
config_dict = config.dict()
ctx = tiledb.Ctx(config=config)

In [20]:
with tiledb.open("s3://ausseabed-pl019-ingested-data/L3/WilsonsPromontory_MNP/WilsonsPromontory_Z_250cm_WGS84_UTMz55S.tiledb", ctx=ctx) as ds:
    print(ds.meta.items())

(('_gdal', (60, 80, 65, 77, 68, 97, 116, 97, 115, 101, 116, 62, 10, 32, 32, 60, 83, 82, 83, 32, 100, 97, 116, 97, 65, 120, 105, 115, 84, 111, 83, 82, 83, 65, 120, 105, 115, 77, 97, 112, 112, 105, 110, 103, 61, 34, 49, 44, 50, 34, 62, 80, 82, 79, 74, 67, 83, 91, 34, 87, 71, 83, 95, 49, 57, 56, 52, 95, 85, 84, 77, 95, 90, 111, 110, 101, 95, 53, 53, 83, 34, 44, 71, 69, 79, 71, 67, 83, 91, 34, 87, 71, 83, 32, 56, 52, 34, 44, 68, 65, 84, 85, 77, 91, 34, 87, 71, 83, 95, 49, 57, 56, 52, 34, 44, 83, 80, 72, 69, 82, 79, 73, 68, 91, 34, 87, 71, 83, 32, 56, 52, 34, 44, 54, 51, 55, 56, 49, 51, 55, 44, 50, 57, 56, 46, 50, 53, 55, 50, 50, 51, 53, 54, 51, 44, 65, 85, 84, 72, 79, 82, 73, 84, 89, 91, 34, 69, 80, 83, 71, 34, 44, 34, 55, 48, 51, 48, 34, 93, 93, 44, 65, 85, 84, 72, 79, 82, 73, 84, 89, 91, 34, 69, 80, 83, 71, 34, 44, 34, 54, 51, 50, 54, 34, 93, 93, 44, 80, 82, 73, 77, 69, 77, 91, 34, 71, 114, 101, 101, 110, 119, 105, 99, 104, 34, 44, 48, 93, 44, 85, 78, 73, 84, 91, 34, 100, 101, 103, 114, 

In [79]:
with rasterio.Env(AWSSession(session)) as env:
    with rasterio.open("s3://ausseabed-pl019-provided-data/JamesCookUniversity/0364_BeagleMarinePark_2018/Products/geotif/ga-0364_tile-s40-5e145-5_bathymetry_egm2008_2018_2m_epsg-4326_20220511.tiff") as src:
        out_uri = "s3://ausseabed-pl019-ingested-data/L3/0364_BeagleMarinePark_2018/ga-0364_tile-s40-5e145-5_bathymetry_egm2008_2018_2m_epsg-4326_20220511.tiledb"
        #data = src.read()
        with rasterio.open(out_uri, "w", count=src.count, driver="TileDB", crs=src.crs, transform=src.transform, nodata=src.nodata, height=src.height, width=src.width, dtype=src.dtypes[0], compression="zstd", blockxsize=512, blockysize=512, compression_level=16) as dst:
            dst.write(src.read())

In [32]:
del data

In [33]:
import gc

In [66]:
gc.collect()

299

In [80]:
ds = tiledb.open("s3://ausseabed-pl019-ingested-data/L3/WilsonsPromontory_MNP/WilsonsPromontory_Z_250cm_WGS84_UTMz55S.tiledb", ctx=ctx)

In [81]:
ds.schema

Domain
"NameDomainTileData TypeIs Var-lengthFiltersBANDS(1, 1)1uint64False-Y(0, 7679)512uint64False-X(0, 14335)512uint64False-"
Attributes
NameData TypeIs Var-LenIs NullableFiltersTDB_VALUESfloat32FalseFalse Name Option Level ZstdFilter level16
Cell Order
row-major
Tile Order
row-major
Capacity
10000
Sparse

Name,Domain,Tile,Data Type,Is Var-length,Filters
BANDS,"(1, 1)",1,uint64,False,-
Y,"(0, 7679)",512,uint64,False,-
X,"(0, 14335)",512,uint64,False,-

Name,Data Type,Is Var-Len,Is Nullable,Filters
TDB_VALUES,float32,False,False,Name Option Level ZstdFilter level16

Name,Option,Level
ZstdFilter,level,16


In [83]:
ds.meta.keys()

['_gdal']

In [84]:
md = ds.meta["_gdal"]

In [88]:
str(bin(md[0]))

'0b111100'

In [98]:
hex(md[1])

'0x50'

In [91]:
hex_string = "".join([hex(i) for i in md])

In [92]:
import xml.etree.ElementTree as ET

In [129]:
numpy.bytes_(md)

b'<PAMDataset>\n  <SRS dataAxisToSRSAxisMapping="1,2">PROJCS["WGS_1984_UTM_Zone_55S",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",147],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32755"]]</SRS>\n  <GeoTransform>  4.2591875000000000e+05,  2.5000000000000000e+00,  0.0000000000000000e+00,  5.6802762500000000e+06,  0.0000000000000000e+00, -2.5000000000000000e+00</GeoTransform>\n  <Metadata domain="IMAGE_STRUCTURE">\n    <MDI key="DATA_TYPE">Float32</MDI>\n    <MDI key="INTERLEAVE">BAND</MDI>\n    <MDI key="NBITS">32</MDI>\n    <MDI key="X_SIZE">13898<

In [152]:
numpy.bytes_(md).decode("utf-8")

'<PAMDataset>\n  <SRS dataAxisToSRSAxisMapping="1,2">PROJCS["WGS_1984_UTM_Zone_55S",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",147],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32755"]]</SRS>\n  <GeoTransform>  4.2591875000000000e+05,  2.5000000000000000e+00,  0.0000000000000000e+00,  5.6802762500000000e+06,  0.0000000000000000e+00, -2.5000000000000000e+00</GeoTransform>\n  <Metadata domain="IMAGE_STRUCTURE">\n    <MDI key="DATA_TYPE">Float32</MDI>\n    <MDI key="INTERLEAVE">BAND</MDI>\n    <MDI key="NBITS">32</MDI>\n    <MDI key="X_SIZE">13898</

In [130]:
root = ET.fromstring(numpy.bytes_(md))

In [133]:
for child in root:
    print(child.tag, child.attrib)

SRS {'dataAxisToSRSAxisMapping': '1,2'}
GeoTransform {}
Metadata {'domain': 'IMAGE_STRUCTURE'}
Metadata {}
PAMRasterBand {'band': '1'}


In [135]:
el = ET.SubElement(root, 'GeoTransform')

In [138]:
el

<Element 'GeoTransform' at 0x7f17fb361ad0>

In [140]:
root.find('GeoTransform').text

'  4.2591875000000000e+05,  2.5000000000000000e+00,  0.0000000000000000e+00,  5.6802762500000000e+06,  0.0000000000000000e+00, -2.5000000000000000e+00'

In [143]:
[float(f) for f in root.find('GeoTransform').text.split(',')]

[425918.75, 2.5, 0.0, 5680276.25, 0.0, -2.5]

In [144]:
root.find('SRS').text

'PROJCS["WGS_1984_UTM_Zone_55S",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",147],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",10000000],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH],AUTHORITY["EPSG","32755"]]'

In [149]:
[i.text for i in root.findall("Metadata")]

['\n    ', '\n    ']

In [153]:
e = root.findall("Metadata")[0]

In [157]:
iterator = e.getiterator()

In [158]:
[(i.tag, i.attrib, i.text) for i in iterator]

[('Metadata', {'domain': 'IMAGE_STRUCTURE'}, '\n    '),
 ('MDI', {'key': 'DATA_TYPE'}, 'Float32'),
 ('MDI', {'key': 'INTERLEAVE'}, 'BAND'),
 ('MDI', {'key': 'NBITS'}, '32'),
 ('MDI', {'key': 'X_SIZE'}, '13898'),
 ('MDI', {'key': 'Y_SIZE'}, '7454')]

In [159]:
ee = root.findall("Metadata")[1]

In [160]:
iterator2 = ee.getiterator()

In [161]:
[(i.tag, i.attrib, i.text) for i in iterator2]

[('Metadata', {}, '\n    '),
 ('MDI', {'key': 'AREA_OR_POINT'}, 'Area'),
 ('MDI', {'key': 'DataType'}, 'Generic')]