In [30]:
from datetime import datetime as dt

## Steps

### 1. Request all imagery over time period
### 2. Loop through each image and download SCL
### 3. Crop SCL to study area and check percent cloud
### 4. If not super cloudy, then download everything?

bbox_boma = [33.494145, 6.592713, 33.730720, 6.753140]
bbox_virunga = [29.397261, -1.464377, 29.55281, -1.366300]
bbox_zakouma = [19.742523, 10.831293, 19.903319, 10.960331]
start_date = dt(2020, 3, 1)
end_date = dt(2020, 4, 1)
dst_path = "./data/testing/collection.json"


In [31]:
import numpy as np
from pystac import ItemCollection
from pystac_client import Client
import rasterio
from shapely.geometry import box, shape, Point

from common.utilities.projections import get_collection_bbox_coverage, reproject_shape


import matplotlib.pyplot as plt



def download_bbox(bbox, cog_url, read_all=False):
            
    with rasterio.open(cog_url) as s3_src:
        window = rasterio.windows.from_bounds(
            bbox[0], bbox[1], 
            bbox[2], bbox[3], 
            transform=s3_src.transform
        )
        
        if read_all:
            s3_data = s3_src.read(masked=True, window=window).astype(np.uint16)
        else:
            s3_data = s3_src.read(1, masked=True, window=window).astype(np.uint16)
    
        return (s3_data, rasterio.windows.transform(window, s3_src.transform))
    

    
def is_scene_cloud_freeish(item, bbox_poly_ll):
    
    item_epsg_int = int(item.properties["proj:epsg"])
    item_epsg_str = f'EPSG:{item_epsg_int}'       

    scene_poly_ll = shape(item.geometry)
    overlap_poly_ll = bbox_poly_ll.intersection(scene_poly_ll)

    overlap_poly_utm = reproject_shape(overlap_poly_ll, init_proj="EPSG:4326", target_proj=item_epsg_str)
    overlap_bbox_utm = np.round(overlap_poly_utm.bounds  , -1)        
    overlap_poly_utm = box(*overlap_bbox_utm)
    
    scl_href = item.assets['SCL'].href
    scl_data, scl_transform = download_bbox(overlap_bbox_utm, scl_href)
    
    cloud_mask = np.isin(scl_data, [8, 9, 10]) 
    cloud_ratio = np.mean(cloud_mask)
    print(cloud_ratio)
    
    return cloud_ratio < 0.60
    

def get_cloud_free_collection(start_date, end_date, bbox, dst_path):
    
    stac_date_format = '%Y-%m-%dT%H:%M:%SZ'
    stac_date_string = start_date.strftime(stac_date_format) + '/' + end_date.strftime(stac_date_format)

    # Open a catalog
    client = Client.open("https://earth-search.aws.element84.com/v0")

    # Get results for a collection in the catalog
    search = client.search(
        bbox=bbox,
        collections=['sentinel-s2-l2a-cogs'], 
        datetime=stac_date_string,
        #sortby='-properties.datetime',
        sortby='properties.eo:cloud_cover',
        query={
            "eo:cloud_cover":{
                "lt": str(100)
            },
        },
    )
    
    bbox_poly_ll = box(*bbox)
    items, items_count = [], {}
    for item in list(search.items()):     

        if is_scene_cloud_freeish(item, bbox_poly_ll):
            square = item.properties['sentinel:grid_square']
            count = items_count.get(square, 0)
            items_count[square] = count + 1
            items.append(item)
            
    print(items_count)
    collection = ItemCollection(items=items)
    collection.save_object(dst_path)
    
    
get_cloud_free_collection(start_date, end_date, bbox_zakouma, dst_path)

    

5.534524362976246e-05
0.0
4.695717505634861e-05
2.8174305033809167e-05
0.000158129267513607
0.40817117176949813
0.06041803053159897
0.9515254730437037
0.8560011269722013
0.9996055597295267
0.9449235761250107
1.0
{'CT': 4, 'CS': 3}
