In [29]:
import json
import urllib.request

from hashlib import sha256
from pathlib import Path

import numpy as np
import rioxarray

from tifffile import TiffFile
from zarr import codecs

In [2]:
STAC_ITEM_HREF = "https://earth-search.aws.element84.com/v1/collections/sentinel-2-c1-l2a/items/S2B_T10TFR_20231223T190950_L2A"
#ASSETS = ['blue', 'green', 'red', 'nir']
ASSETS = ['red']

In [3]:
def hash_bytes(b: bytes) -> str:
    h = sha256()
    h.update(b)
    return h.digest().hex()

In [4]:
with urllib.request.urlopen(urllib.request.Request(STAC_ITEM_HREF)) as response:
    stac_item = json.loads(response.read())

In [5]:
stac_item

{'type': 'Feature',
 'stac_version': '1.0.0',
 'id': 'S2B_T10TFR_20231223T190950_L2A',
 'properties': {'created': '2024-01-05T19:36:39.226Z',
  'platform': 'sentinel-2b',
  'constellation': 'sentinel-2',
  'instruments': ['msi'],
  'eo:cloud_cover': 1.251174,
  'proj:epsg': 32610,
  'proj:centroid': {'lat': 45.54146, 'lon': -121.01583},
  'mgrs:utm_zone': 10,
  'mgrs:latitude_band': 'T',
  'mgrs:grid_square': 'FR',
  'grid:code': 'MGRS-10TFR',
  'view:azimuth': 261.58446495525544,
  'view:incidence_angle': 4.470565438518265,
  'view:sun_azimuth': 167.494432114129,
  'view:sun_elevation': 20.050386200648404,
  's2:tile_id': 'S2B_OPER_MSI_L2A_TL_2BPS_20231223T212404_A035503_T10TFR_N05.10',
  's2:degraded_msi_data_percentage': 0.01,
  's2:nodata_pixel_percentage': 3e-06,
  's2:saturated_defective_pixel_percentage': 0,
  's2:dark_features_percentage': 14.411107,
  's2:cloud_shadow_percentage': 0.218994,
  's2:vegetation_percentage': 27.606273,
  's2:not_vegetated_percentage': 53.845996,
  

In [6]:
cog_hrefs = {name: asset['href'] for name, asset in stac_item['assets'].items() if name in ASSETS}
cog_hrefs

{'red': 'https://e84-earth-search-sentinel-data.s3.us-west-2.amazonaws.com/sentinel-2-c1-l2a/10/T/FR/2023/12/S2B_T10TFR_20231223T190950_L2A/B04.tif'}

In [7]:
test_data = Path('test_data')
test_data.mkdir(exist_ok=True)

In [8]:
for name, href in cog_hrefs.items():
    file_name = test_data / (name + '.tif')
    
    if file_name.exists():
        continue

    with urllib.request.urlopen(urllib.request.Request(href)) as response:
        (test_data / (name + '.tif')).write_bytes(response.read())

In [9]:
tags = {}
with TiffFile(test_data/'red.tif') as tif:
    for tag in tif.pages[0].tags:
        tag_name, tag_value = tag.name, tag.value
        tags[tag_name] = tag_value
        print(tag_name, tag_value)

ImageWidth 10980
ImageLength 10980
BitsPerSample 16
Compression 8
PhotometricInterpretation 1
SamplesPerPixel 1
PlanarConfiguration 1
Predictor 2
TileWidth 1024
TileLength 1024
TileOffsets (55962680, 57411167, 58810332, 60222446, 61651003, 63054996, 64463518, 66025043, 67523672, 68987825, 70439668, 71480485, 72831139, 74191906, 75556803, 76922917, 78346396, 79767466, 81177106, 82626646, 84045343, 85436959, 86443457, 87744763, 89128625, 90516041, 91896145, 93323921, 94699513, 96054131, 97398784, 98768288, 100117176, 101099165, 102475360, 103914015, 105337327, 106767167, 108155055, 109496290, 110853634, 112244713, 113588526, 114895196, 115881834, 117297939, 118737048, 120185270, 121620456, 123031867, 124402930, 125797422, 127166585, 128512496, 129847193, 130813023, 132267746, 133721219, 135168258, 136590051, 138004961, 139387400, 140792216, 142173401, 143531515, 144883653, 145909014, 147512228, 148965842, 150393926, 151820955, 153228408, 154629691, 156014882, 157380367, 158731976, 160114

In [100]:
with (test_data/'red.tif').open('rb') as tif:
    tif.seek(55962680)
    print(hash_bytes(tif.read(1448479)))

2c02e7e60074d6767ccb4c44de2da249d331fd82e107431e41cfe4069bae0d62


In [24]:
ds = rioxarray.open_rasterio(test_data/'red.tif').to_dataset(name='red')

In [25]:
ds

In [99]:
ds.to_zarr(
    test_data/'zarr',
    zarr_format=3,
    encoding={
        'red': {
            "chunks": (1, 1024, 1024),
        },
    },
    safe_chunks=False,
)



<xarray.backends.zarr.ZarrStore at 0x159d1e830>

In [32]:
import zlib

In [47]:
with (test_data/'red.tif').open('rb') as tif:
    tif.seek(55962680)
    tile_bytes = tif.read(1448479)


In [48]:
len(tile_bytes)

1448479

In [None]:
zlib.decompress(tile_bytes, 0)

In [51]:
zstd_codec = codecs.ZstdCodec()

In [60]:
from numcodecs.zstd import Zstd

from io import BytesIO

In [64]:
zstd = Zstd()

In [66]:
zstd_tile = zstd.encode(zlib.decompress(tile_bytes, 0))

In [97]:
zstd_tile = zstd.encode(np.cumsum(np.frombuffer(zlib.decompress(tile_bytes, 0), dtype=np.uint16).reshape(1024, 1024), axis=1, dtype=np.uint16).tobytes())

In [102]:
len(zstd_tile)
print(len(zstd_tile))
print(hash_bytes(zstd_tile))

1454273
6bad8a3594bbdf9300c7f823a5969ece06d6f596d9139908c2f01de51e564af8


In [81]:
with (test_data/'zarr'/'red'/'c'/'0'/'0'/'0').open('rb') as fh:
    zarr_tile_bytes = fh.read()

In [101]:
print(len(zarr_tile_bytes))
print(hash_bytes(zarr_tile_bytes))

1454273
6bad8a3594bbdf9300c7f823a5969ece06d6f596d9139908c2f01de51e564af8


In [86]:
import xarray
ds2 = xarray.open_dataset(test_data/'zarr', engine='zarr')

In [87]:
ds2

In [89]:
ds2['red'].encoding

{'chunks': (1, 1024, 1024),
 'preferred_chunks': {'band': 1, 'y': 1024, 'x': 1024},
 'compressors': (ZstdCodec(level=0, checksum=False),),
 'filters': (),
 'shards': None,
 'serializer': BytesCodec(endian=<Endian.little: 'little'>),
 '_FillValue': 0,
 'scale_factor': 0.0001,
 'add_offset': -0.1,
 'dtype': dtype('uint16')}