# Basic Workflow to create Sentinel-Hub BYOC collection

In [1]:
import os
import glob

In [2]:
# define repo name and get root working directory
repo = 'byoc-api'
root_path = os.getcwd()[ 0 : os.getcwd().find( repo ) + len ( repo )]
root_path

'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api'

### _Use small selection of Landsat 8/9 images to create BYOC collection_

In [3]:
# get directory contents
scenes = glob.glob( os.path.join( root_path, 'data/*.TIF') )
scenes

['C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\data\\LC08_L2SP_203025_20210907_20210915_02_T1_SR_B1.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\data\\LC08_L2SP_203025_20210907_20210915_02_T1_SR_B2.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\data\\LC08_L2SP_203025_20211126_20211201_02_T1_SR_B1.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\data\\LC08_L2SP_203025_20211126_20211201_02_T1_SR_B2.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\data\\LC09_L2SP_203024_20220310_20220312_02_T1_SR_B1.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\data\\LC09_L2SP_203024_20220310_20220312_02_T1_SR_B2.TIF']

In [4]:
# LC08_L2SP_203025_20210907_20210915_02_T1_SR_B1.TIF
SOURCE = 0
LEVEL = 1
PATH_ROW = 2
ACQUISITION_DATETIME = 3
PROCESSING_DATETIME = 4
COLLECTION = 5

### _Use GDAL to convert Landsat 8/9 channel images to COG format_

In [5]:
from osgeo import gdal
from datetime import datetime

# process each scene to cog
out_path = os.path.join( root_path, 'cogs' )
for scene in scenes:
    
    # split on underscore
    tokens = os.path.basename( scene ).split( '_' )
    dt = datetime.strptime( tokens[ ACQUISITION_DATETIME ], '%Y%m%d' )
    
    # construct unique path for scene
    path = os.path.join( out_path, tokens[ PATH_ROW ] )
    path = os.path.join( path, dt.strftime( '%Y%m%d_000000' ) )
    
    if not os.path.exists( path ):
        os.makedirs( path )

    # open newly created tile with gdal
    ds = gdal.Open( scene )
    if ds is not None:
        
        # additional info from: https://docs.sentinel-hub.com/api/latest/api/byoc/#constraints-and-settings
        # remember to define NO_DATA_VALUE if input data has nodata values: -a_nodata NO_DATA_VALUE
        # add a predictor to further reduce the file size: add -co PREDICTOR=YES.
        
        # setup options
        options = '-a_nodata 0 '
        options += '-of COG -co COMPRESS=DEFLATE -co BLOCKSIZE=1024 -co RESAMPLING=AVERAGE -co OVERVIEWS=IGNORE_EXISTING '
        options += '-co PREDICTOR=YES'
                                
        # translate png / jpg into geotiff
        pathname = os.path.join( path, tokens[ -1 ] )
        ds = gdal.Translate( pathname, ds, options=options )
        ds = None

In [6]:
cogs = glob.glob( os.path.join( root_path, 'cogs/**/*.TIF'), recursive=True )
cogs

['C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\cogs\\203024\\20220310_000000\\B1.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\cogs\\203024\\20220310_000000\\B2.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\cogs\\203025\\20210907_000000\\B1.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\cogs\\203025\\20210907_000000\\B2.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\cogs\\203025\\20211126_000000\\B1.TIF',
 'C:\\Users\\crwil\\Documents\\GitHub\\sentinelhub\\byoc-api\\cogs\\203025\\20211126_000000\\B2.TIF']

### _Upload newly generated COG images to AWS S3 bucket storage_

In [7]:
import boto3

# bucket details
s3 = boto3.resource('s3')
BUCKET = "4rd-climate-finance"

for cog in cogs:
    
    # create remote pathname
    prefix = os.path.dirname( cog )[ len( os.path.join( root_path, 'cogs' ) ) + 1 : ]
    prefix = os.path.join( 'byoc-collection-123', prefix )
    
    # get pathname
    s3_pathname = os.path.join( prefix, os.path.basename( cog ) )    
    s3_pathname = s3_pathname.replace(os.sep, '/' )
    
    # upload file to s3 bucket
    s3.Bucket(BUCKET).upload_file( cog, s3_pathname )

### _Generate list of newly uploaded COG images on AWS storage bucket_

In [8]:
import os
import boto3
from collections import namedtuple
from operator import attrgetter


S3Obj = namedtuple('S3Obj', ['key', 'mtime', 'size', 'ETag'])


def s3list(bucket, path, start=None, end=None, recursive=True, list_dirs=True,
           list_objs=True, limit=None):
    """
    Iterator that lists a bucket's objects under path, (optionally) starting with
    start and ending before end.

    If recursive is False, then list only the "depth=0" items (dirs and objects).

    If recursive is True, then list recursively all objects (no dirs).

    Args:
        bucket:
            a boto3.resource('s3').Bucket().
        path:
            a directory in the bucket.
        start:
            optional: start key, inclusive (may be a relative path under path, or
            absolute in the bucket)
        end:
            optional: stop key, exclusive (may be a relative path under path, or
            absolute in the bucket)
        recursive:
            optional, default True. If True, lists only objects. If False, lists
            only depth 0 "directories" and objects.
        list_dirs:
            optional, default True. Has no effect in recursive listing. On
            non-recursive listing, if False, then directories are omitted.
        list_objs:
            optional, default True. If False, then directories are omitted.
        limit:
            optional. If specified, then lists at most this many items.

    Returns:
        an iterator of S3Obj.

    Examples:
        # set up
        >>> s3 = boto3.resource('s3')
        ... bucket = s3.Bucket('bucket-name')

        # iterate through all S3 objects under some dir
        >>> for p in s3list(bucket, 'some/dir'):
        ...     print(p)

        # iterate through up to 20 S3 objects under some dir, starting with foo_0010
        >>> for p in s3list(bucket, 'some/dir', limit=20, start='foo_0010'):
        ...     print(p)

        # non-recursive listing under some dir:
        >>> for p in s3list(bucket, 'some/dir', recursive=False):
        ...     print(p)

        # non-recursive listing under some dir, listing only dirs:
        >>> for p in s3list(bucket, 'some/dir', recursive=False, list_objs=False):
        ...     print(p)
"""
    kwargs = dict()
    if start is not None:
        if not start.startswith(path):
            start = os.path.join(path, start)
        # note: need to use a string just smaller than start, because
        # the list_object API specifies that start is excluded (the first
        # result is *after* start).
        kwargs.update(Marker=__prev_str(start))
    if end is not None:
        if not end.startswith(path):
            end = os.path.join(path, end)
    if not recursive:
        kwargs.update(Delimiter='/')
        if not path.endswith('/'):
            path += '/'
    kwargs.update(Prefix=path)
    if limit is not None:
        kwargs.update(PaginationConfig={'MaxItems': limit})

    paginator = bucket.meta.client.get_paginator('list_objects')
    for resp in paginator.paginate(Bucket=bucket.name, **kwargs):
        q = []
        if 'CommonPrefixes' in resp and list_dirs:
            q = [S3Obj(f['Prefix'], None, None, None) for f in resp['CommonPrefixes']]
        if 'Contents' in resp and list_objs:
            q += [S3Obj(f['Key'], f['LastModified'], f['Size'], f['ETag']) for f in resp['Contents']]
        # note: even with sorted lists, it is faster to sort(a+b)
        # than heapq.merge(a, b) at least up to 10K elements in each list
        q = sorted(q, key=attrgetter('key'))
        if limit is not None:
            q = q[:limit]
            limit -= len(q)
        for p in q:
            if end is not None and p.key >= end:
                return
            yield p


def __prev_str(s):
    if len(s) == 0:
        return s
    s, c = s[:-1], ord(s[-1])
    if c > 0:
        s += chr(c - 1)
    s += ''.join(['\u7FFF' for _ in range(10)])
    return s

In [9]:
s3 = boto3.resource('s3')
bucket = s3.Bucket('4rd-climate-finance')

# iterate through all S3 objects under some dir
paths = []
for p in s3list(bucket, 'byoc-collection-123/'):
    paths.append( os.path.dirname( str(p.key) ) )
paths

['byoc-collection-123/203024/20220310_000000',
 'byoc-collection-123/203024/20220310_000000',
 'byoc-collection-123/203025/20210907_000000',
 'byoc-collection-123/203025/20210907_000000',
 'byoc-collection-123/203025/20211126_000000',
 'byoc-collection-123/203025/20211126_000000']

### _Initialise Process and BYOC API Configuration_

In [10]:
from sentinelhub import (
    SHConfig, DataCollection, Geometry, BBox, CRS,
    SentinelHubRequest, filter_times, bbox_to_dimensions, MimeType,
    SentinelHubBYOC, ByocCollection, ByocTile, ByocCollectionAdditionalData,
    DownloadFailedException
)

# Initialize SentinelHubBYOC class
config = SHConfig()
byoc = SentinelHubBYOC(config=config)

In [11]:
# get list of byoc collections
collections_iterator = byoc.iter_collections()
my_collections = list(collections_iterator)
my_collections

[{'id': '2c709ccd-12bd-42c1-8308-ff8345e4a0d0',
  'userId': '1f046949-4e02-4859-a4c1-807fc312dc7a',
  'name': 'another_new_collection',
  's3Bucket': '4rd-climate-finance',
  'additionalData': {'bands': {'LC09_L2SP_203023_20220310_20220312_02_T1_SR_B1': {'bitDepth': 16,
     'source': 'LC09_L2SP_203023_20220310_20220312_02_T1_SR_B1',
     'bandIndex': 1,
     'sampleFormat': 'UINT'},
    'LC09_L2SP_203023_20220310_20220312_02_T1_SR_B2': {'bitDepth': 16,
     'source': 'LC09_L2SP_203023_20220310_20220312_02_T1_SR_B2',
     'bandIndex': 1,
     'sampleFormat': 'UINT'}},
   'maxMetersPerPixel': 2400.0,
   'extent': {'type': 'Polygon',
    'coordinates': [[[-3.789221674, 51.994693002],
      [-3.789221674, 54.177840954],
      [-0.164235984, 54.177840954],
      [-0.164235984, 51.994693002],
      [-3.789221674, 51.994693002]]]},
   'hasSensingTimes': 'NO'},
  'noData': 0,
  'created': '2022-03-17T20:26:55.456140Z',
  'requiresMetadataUpdate': False},
 {'id': '3d2a9be6-6c5b-4e7a-87d7-e6ea3

In [13]:
# create new collection
# new_collection = ByocCollection(name='byoc-collection-123', s3_bucket='4rd-climate-finance')
# created_collection = byoc.create_collection(new_collection)
for collection in my_collections:
    if collection['name'] == 'byoc-collection-123':
        created_collection = collection
        break
created_collection        

{'id': '3d2a9be6-6c5b-4e7a-87d7-e6ea30e72f78',
 'userId': '1f046949-4e02-4859-a4c1-807fc312dc7a',
 'name': 'byoc-collection-123',
 's3Bucket': '4rd-climate-finance',
 'additionalData': {'bands': {'B1': {'bitDepth': 16,
    'source': 'B1',
    'bandIndex': 1,
    'sampleFormat': 'UINT'},
   'B2': {'bitDepth': 16,
    'source': 'B2',
    'bandIndex': 1,
    'sampleFormat': 'UINT'}},
  'maxMetersPerPixel': 1200.0,
  'extent': {'type': 'Polygon',
   'coordinates': [[[-4.954224922, 49.180359191],
     [-4.954224922, 52.764843732],
     [-0.887166211, 52.764843732],
     [-0.887166211, 49.180359191],
     [-4.954224922, 49.180359191]]]},
  'fromSensingTime': '2021-09-07T00:00:00Z',
  'toSensingTime': '2022-03-10T00:00:00Z',
  'hasSensingTimes': 'YES'},
 'noData': 0,
 'created': '2022-03-18T13:32:02.367698Z',
 'requiresMetadataUpdate': False}

In [14]:
import pandas as pd

# show newly created collection
my_collections_df = pd.DataFrame(data=list(byoc.iter_collections()))
my_collections_df[['id','name','created']].head()

Unnamed: 0,id,name,created
0,2c709ccd-12bd-42c1-8308-ff8345e4a0d0,another_new_collection,2022-03-17T20:26:55.456140Z
1,3d2a9be6-6c5b-4e7a-87d7-e6ea30e72f78,byoc-collection-123,2022-03-18T13:32:02.367698Z
2,94ff6c84-2b9a-4268-a543-a03f75b40dc3,test,2022-03-17T12:22:29.216833Z


In [15]:
tiles = []
for path in set( paths ):
    tokens = path.split( '/' )
    
    dt = datetime.strptime( tokens[ -1 ], '%Y%m%d_%H%M%S' )
    tiles.append( ByocTile(
                    path=f'{path}/(BAND).TIF',
                    sensing_time=dt ) )
tiles

[ByocTile(path='byoc-collection-123/203025/20211126_000000/(BAND).TIF', other_data={}, status=None, tile_id=None, tile_geometry=None, cover_geometry=None, created=None, sensing_time=datetime.datetime(2021, 11, 26, 0, 0), additional_data=None),
 ByocTile(path='byoc-collection-123/203024/20220310_000000/(BAND).TIF', other_data={}, status=None, tile_id=None, tile_geometry=None, cover_geometry=None, created=None, sensing_time=datetime.datetime(2022, 3, 10, 0, 0), additional_data=None),
 ByocTile(path='byoc-collection-123/203025/20210907_000000/(BAND).TIF', other_data={}, status=None, tile_id=None, tile_geometry=None, cover_geometry=None, created=None, sensing_time=datetime.datetime(2021, 9, 7, 0, 0), additional_data=None)]

In [16]:
for tile in tiles:
    byoc.create_tile(created_collection, tile )

list(byoc.iter_tiles(created_collection))

DownloadFailedException: Failed to download from:
https://services.sentinel-hub.com/api/v1/byoc/collections/3d2a9be6-6c5b-4e7a-87d7-e6ea30e72f78/tiles
with HTTPError:
409 Client Error: Conflict for url: https://services.sentinel-hub.com/api/v1/byoc/collections/3d2a9be6-6c5b-4e7a-87d7-e6ea30e72f78/tiles
Server response: "{"error":{"status":409,"reason":"Conflict","message":"Conflict","code":"COMMON_UNIQUE_KEY_VIOLATION"}}"

In [None]:
list(byoc.iter_tiles(created_collection))

In [None]:
data_collection = DataCollection.define_byoc( '3d2a9be6-6c5b-4e7a-87d7-e6ea30e72f78' )
data_collection

In [None]:
from sentinelhub import SentinelHubCatalog
catalog = SentinelHubCatalog(config=config)
catalog.get_info()

In [None]:
collections = catalog.get_collections()
collections

In [None]:
def getTimeInterval( timeframe ):

    """
    return time interval tuple
    """

    # get time interval tuple        
    tz_format = '%Y-%m-%dT%H:%M:%S%z'
    return timeframe[ 'start' ].strftime( tz_format ), timeframe[ 'end' ].strftime( tz_format )

In [None]:
from sentinelhub import bbox_to_dimensions

bbox = BBox([530575, 5595700, 547118, 5606883], crs=CRS(32630))
aoi = bbox_to_dimensions( bbox, resolution=30)

In [None]:
from datetime import datetime, timedelta

timeframe = { 'start' : datetime( 2021, 9, 1, 0, 0, 0 ), 
                'end' : datetime( 2021, 10, 31, 23, 59, 59 ) }

_delta = timedelta(hours=1)

# execute search
iterator = catalog.search (
            data_collection,
            bbox=bbox,
            time=getTimeInterval( timeframe ),
            query=None,
            fields=None )

# filter timestamps into +- 1 hour groupings
timestamps = iterator.get_timestamps()
#timestamps = filter_times( timestamps, _delta )
timestamps


In [None]:
evalscript = """
//VERSION=3
function setup() {
  return {
    input: ["B1" ],
    output: {
                bands: 1,
                sampleType: "UINT16"
    }
  };
}

function evaluatePixel(sample) {
  return [ sample.B1 ];
}
"""

request = SentinelHubRequest(
    evalscript=evalscript,
    input_data=[
        SentinelHubRequest.input_data(
            data_collection=data_collection,
            time_interval=getTimeInterval( timeframe )
        )
    ],
    responses=[
        SentinelHubRequest.output_response('default', MimeType.TIFF )
    ],
    bbox=bbox,
    size=aoi,
    config=config
)

In [None]:
data = request.get_data()[0]

In [None]:
data

In [None]:
import matplotlib.pyplot as plt
plt.imshow( data )