In [None]:
from collections import OrderedDict
from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt

import rasterio
from rasterio.merge import merge
from rasterio.plot import show
from rasterio.plot import plotting_extent
from rasterio.warp import calculate_default_transform, reproject
from rasterio.enums import Resampling

import os
from glob import glob
import numpy as np
import fiona
import matplotlib.pyplot as plt
import earthpy.plot as ep
from collections import OrderedDict
import folium
from datetime import date
import geopandas
import earthpy
import earthpy.plot as ep

## 1. Download Sentinel-2 data

In [None]:
footprint = read_geojson("example_aoi.geojson")
print(footprint)

# plot the area of interest on a map
m = folium.Map([35.0, -119.0], zoom_start=10)
boundsdata =  footprint
folium.GeoJson(boundsdata).add_to(m)
m

In [None]:
api = SentinelAPI('username', 'password', "https://apihub.copernicus.eu/apihub") # user will need to sign up for an account and add in your details here

start_date = '20220920'
end_date = '20220928'

footprint = geojson_to_wkt(footprint)
products = api.query(footprint,
                     date=(start_date, end_date),
                     platformname='Sentinel-2',
                     processinglevel= 'Level-2A',
                     cloudcoverpercentage=(0, 30))

metadata = api.to_geojson(products)
products_df = api.to_dataframe(products)
print(len(products_df))

In [None]:
# plot the different datasets to decide which to download (e.g. an image with very small amount 
#in the aoi is less likely to add much value if another image covers the area)
def style(feature):
    """ Change colour of the tiles so it is clear which is the aoi and the shapes of S2 images"""
    return {
        'fill_color': feature['properties']['fill_color'],
        'color': feature['properties']['color']
    }

dataset = geopandas.read_file(footprint)
dataset['color'] = 'red'
dataset['fill_color'] = 'red'
print(dataset)

m = folium.Map([35.0, -119.0], zoom_start=8)
boundsdata =  metadata
# folium.GeoJson(boundsdata[0]).add_to(m)
#folium.GeoJson(boundsdata[1]).add_to(m)
folium.GeoJson(dataset,style_function=style).add_to(m)
m

In [None]:
# remove one product from the products ordered list
# first, find the index of the observation to be removed
print(products_df.index.tolist())
products_df = api.to_dataframe(products)
product_id = products_df.iat[1, 0] # print the value at a specific reference (row, column) as this can be cut off in the browser

del products[product_id]
print("new number of files to download: ", len(products))

In [None]:
api.download_all(products)

In [None]:
# if there is a problem downloading a file, try triggering and downloading individually
# mainly for downloading observations more than 1 year ago as it will be in archive
product_id = products_df.iat[1, 0]
product_info = api.get_product_odata(product_id)
is_online = api.is_online(product_id)

if is_online:
    print(f'Product {product_id} is online. Starting download.')
    api.download(product_id)
else:
    print(f'Product {product_id} is not online.')
    api.trigger_offline_retrieval(product_id)

## 2. Mosaic tiles 

S2 data is stored in tiles. The AOIs required for this project span multiple tiles, so I need to mosaic/merge them together. 

Need to check that the CRSs of the datasets match before mosaic, and if not need to reproject to a CRS.

In [None]:
def change_crs(in_path, out_path, new_crs):
    """ Reproject file and update metadata """
    with rasterio.open(in_path) as source:
        source_crs = source.crs
        print(source_crs)
        transform, width, height = calculate_default_transform(source_crs, new_crs, source.width, source.height, *source.bounds)
        kwargs = source.meta.copy()

        kwargs.update({
            "driver": 'GTiff',
            'crs': new_crs,
            'transform': transform,
            'width': width,
            'height': height})
        print(source.meta)
        
        with rasterio.open(out_path, 'w', **kwargs) as dst:
            print(out_path)
            for i in range(1, source.count + 1):
                print(source.count)
                reproject(
                    source=rasterio.band(source, i),
                    destination=rasterio.band(dst, i),
                    src_transform=source.transform,
                    src_crs=source.crs,
                    dst_transform=transform,
                    dst_crs=new_crs,
                    resampling=Resampling.nearest)
                print("reprojected")
                print(source.crs)
    return(out_path)

In [None]:
bands = ["B01", "B02", "B03", "B04", "B05",  "B06", "B07", "B08", "B8A", "B09", "B10", "B11", "B12", "TCI"]
res = ["10m","20m", "60m"]

# create dictionary of the paths to the data in the S2 folder
# one file for each band
path = 'Desktop' # change path here to unzipped S2 folder
Sentinel_2_path = os.path.join(path, 'Sentinel-2')
observation_list = os.listdir(Sentinel_2_path)

path_dict = {}

for f in glob(os.path.join(Sentinel_2_path, '*.SAFE')):
    folder = os.listdir(os.path.join(f, 'GRANULE/'))
    for subf in folder:
        if(subf.startswith("L2A_")):
            completed = []
            for r in res:
                for b in bands:
                    dict_key = b+"_"+r
                    interim_path = os.path.join(f, 'GRANULE/', subf,'IMG_DATA/R' + r)
                    full_path = glob(os.path.join(interim_path, '*_'+dict_key+'.jp2'))
                    if len(full_path) > 0:
                        if b not in completed: # only want to save the lowest res version of each band
                            completed.append(b)
                            try:
                                path_dict[dict_key].append(full_path[0])
                                
                            except:
                                path_dict[dict_key] = [full_path[0]] 

In [None]:
for key in path_dict:
    source_files = []
    full_path = path_dict[key]
    for f in full_path:
        out_path_reprojected = os.path.join(Sentinel_2_path,"new_crs", key+".tiff") 
        change_crs(f, out_path_reprojected, "EPSG:32612")
        source = rasterio.open(out_path_reprojected)
        print(source.crs)
        source_files.append(source)
    # mosaic, using rasterio’s merge function, https://rasterio.readthedocs.io/en/latest/api/rasterio.merge.html
    mosaic, transformation_info = merge(source_files)
    # update metadata
    out_meta = source.meta.copy()

    # TCI file has RGB layers, so count is 3. Count is 1 for all other bands.
    if key == "10m_TCI":
        out_meta.update({"driver": 'GTiff', #"JP2OpenJPEG",
                         "height": mosaic.shape[1],
                         "width": mosaic.shape[2],
                         "count": 3,
                         "transform": transformation_info
                        })
    else:
        out_meta.update({"driver": 'GTiff',
                         "height": mosaic.shape[1],
                         "width": mosaic.shape[2],
                         "transform": transformation_info
                        })
    print(out_meta)
    out_path = os.path.join(Sentinel_2_path,"processed", key+".tiff")

    with rasterio.open(out_path, "w", **out_meta) as dst:
        dst.write(mosaic)

In [None]:
# visualise the new large image with the aoi box
aoi = geopandas.read_file(read_geojson("/Users/amycairns/Desktop/example_aoi.geojson"))
print(aoi.crs)

source = rasterio.open(out_path)
show(source, cmap='terrain')
print(source.shape)
source_plot_ext = plotting_extent(source)
print(source_plot_ext)

source_arr = source.read(1) 
source_plot_ext = plotting_extent(source)
fig, ax = plt.subplots(figsize=(12, 8))
ep.plot_bands(source_arr, ax=ax, extent=source_plot_ext)
aoi.plot(color='teal', edgecolor='black', ax=ax)
plt.show()

## 3. Crop

Now I have one image that covers a large area, from a few different images. Need to crop to the aoi. 

In [None]:
# First need to update the crs of the aoi file I'm using, so it matches the sentinel 2 data
# import area of interest as a fiona geometry

#import geopandas #this changed the crs to the same as the sentinel 2, but won't work with the rest of the code (mask) 
#https://stackoverflow.com/questions/62717402/received-attributeerror-str-object-has-no-attribute-get-when-use-raster-geo
aoi = geopandas.read_file(footprint)
print(aoi)
print("Old crs:", aoi.crs)
aoi["geometry"] = aoi['geometry'].to_crs(epsg=32611) 
aoi.to_file(os.path.join(path, "aoi.geojson"), driver="GeoJSON")
print("New crs:", aoi.crs)
print(aoi)

In [None]:
# crop each band in the processed folder to the coordinates of the aoi file

base_path = 'path'
locations = ['California', 'Arizona']

for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
            path = os.path.join(base_path, location, observation)

            #check the crs's match
            aoi_path = os.path.join(path, "aoi.geojson")
            aoi_geom = geopandas.read_file(aoi_path)
            print("aoi geom:",aoi_geom.crs)

            processed_path = os.path.join(path, "Sentinel-2", "processed")
            file_paths = glob(os.path.join(processed_path, '*.tiff'))

            for file in file_paths:
                file_name = os.path.basename(file)
                processed = rasterio.open(file)
                print("file crs:", processed.crs)

                # apply the mask
                outImage, outTransform = rasterio.mask.mask(processed, 
                                                       aoi_geom.geometry, 
                                                       all_touched = True, # some pixels will go outside of the box
                                                       crop = True)

                #update metadata with the new dimensions
                outMeta = processed.meta
                if file_name == "TCI_10m.tiff":
                    outMeta.update({"driver": 'GTiff',
                                    "height": outImage.shape[1],
                                   "width": outImage.shape[2],
                                    "count": 3,
                                   "transform": outTransform})
                else: 
                    outMeta.update({"driver": 'GTiff',
                                    "height": outImage.shape[1],
                                   "width": outImage.shape[2],
                                   "transform": outTransform})

                outRaster = rasterio.open(os.path.join(processed_path, "crop_"+ file_name), "w", **outMeta)
                outRaster.write(outImage)
                outRaster.close()

In [None]:
# plot the cropped image and the aoi to check the crop has worked
image = rasterio.open(os.path.join(path,  "Sentinel-2", "processed", "crop_TCI_10m.tiff"))

print(image.shape)
print(plotting_extent(image))

fig, ax = plt.subplots(figsize=(12, 8))
ep.plot_bands(image.read(1), extent =plotting_extent(image), ax=ax)
aoi_geom.plot(color='teal', edgecolor='black', ax=ax)
plt.show()

## 4. Resample all bands to 10m resolution


In [None]:
def resample(path, out_dir, target_resolution): 
    
    with rasterio.open(path) as dataset:
        resampling_factor = dataset.transform[0]/target_resolution
        if resampling_factor != 1:
            # resample data to target pixel size
            data = dataset.read(
                out_shape=(
                    dataset.count,
                    int(dataset.height * resampling_factor),
                    int(dataset.width * resampling_factor)
                ),
                resampling=Resampling.bilinear
            )

            # image transform
            transform = dataset.transform * dataset.transform.scale(
                (dataset.width / data.shape[-1]),
                (dataset.height / data.shape[-2])
            )
            out_meta = dataset.meta.copy()
            out_height = int(dataset.height * resampling_factor)
            out_width = int(dataset.width * resampling_factor)
            crs = dataset.crs
            out_meta.update({"driver":"GTiff",
                            "height": out_height,
                            "width": out_width,
                            "transform": transform,
                            "crs" : crs})

            out_path = os.path.join(out_dir, "resample_"+str(resampling_factor)+"_"+os.path.basename(path))
            with rasterio.open(out_path,"w",**out_meta) as dest:
                dest.write(data)

        else:  
            shutil.copy(path, out_dir)

In [None]:
#iterate all sentinel 2 folders for each observation and resample
base_path = 'path'
locations = ['California', 'Arizona']

for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
            for file in glob(os.path.join(base_path, location, observation, "Sentinel-2/processed", "crop_*")):
                print(file)
                out_dir = os.path.join(base_path, location, observation, "Sentinel-2/10m_sampling/")
                resample(file, out_dir, 10)

## 5. Crop to 15x20km 

In [None]:
base_path = 'path'

for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
        for file in glob(os.path.join(base_path, location, observation, "Sentinel-2/10m_sampling/*.tiff")):
            with rasterio.open(file) as src:
                # see before
                print(file)
                print("width:", src.width)
                print("height", src.height)
                data_array = np.array(src.read())
                plt.imshow(data_array[0])
                plt.show()

                #set variables
                window_width, window_height = 1500, 2000 # desired pixels in window
                mid_width, mid_height = src.width/2, src.height/2  

                # create and apply the window, and print
                window = create_window(mid_height, mid_width, window_height, window_width)
                arr = src.read(1, window=window)
                print("height, width:", np.shape(arr))
                plt.imshow(arr)
                plt.show()

                #update metadata
                meta = src.meta.copy()
                crs = src.crs
                meta.update({"driver":"GTiff",
                                "height": window_height,
                                "width": window_width,
                                "transform": src.window_transform(window),
                                "crs" : crs})
                print(meta)

                #save windowed version
                with rasterio.open(os.path.join(base_path, location, observation, 'Sentinel-2/cropped/'+os.path.basename(file)), 'w', **meta) as dst:
                    dst.write(src.read(window=window))

## 6. Combine all bands into one .npy file

In [None]:
#combine all bands into a npy file for each observation
bands = ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B11", "B12"] # same order as BigEarthNet .npy files

for location in locations:
    for observation in os.listdir(os.path.join(base_path, location)):
        observation_array = []
        for band in bands:
            file_path = glob(os.path.join(base_path, location, observation, "Sentinel-2/4.cropped/", "*"+band+"_*"))
            with rasterio.open(file_path[0]) as dataset:
                array = np.array(dataset.read())
                print(band, np.shape(array))
                print(band, dataset.bounds)
                print(band, dataset.transform)
                observation_array.append(array[0])
        print(np.shape(observation_array))
        np.save(os.path.join(base_path, location, observation, "Sentinel-2/all_bands.npy"), observation_array)