## Cleaning up the code

**Inputs**
- parent directory of a S2 image (.SAFE folder)
- `envelope_gdf` (for now I will also include the code to go from buffer to envelope here)

In [9]:
# Imports. To be cleaned as well

import pandas as pd
from matplotlib import pyplot as plt
from shapely.geometry import box
import rasterio
from rasterio.plot import show
from rasterio.mask import mask
from rasterio.coords import BoundingBox
from rasterio.features import geometry_mask
import json
import geopandas as gpd
import numpy as np
import glob
path = r'C:\Users\Kostas\Downloads\S2A_MSIL2A_20170420T103021_N0204_R108_T32UNB_20170420T103454.SAFE\GRANULE\L2A_T32UNB_A009543_20170420T103454\IMG_DATA\R10m\L2A_T32UNB_20170420T103021_B08_10m.jp2'

### ------------ Buffer to envelope -----------------

In [2]:
# Read buffer file
buffers = gpd.read_file(r'C:\Users\Kostas\Desktop\GIMA\Module_7\Data\PEP725\After_2016_sent_from_PEP725\pep725_outputs\pep725_high_count_days\buffers_day_92.geojson')
# Set crs because by default GeoPandas loads it in 4326, whereas it is actually 32632
buffers.set_crs(32632, inplace=True, allow_override=True)

# Create envelopes for the buffers
envelope_series = buffers.geometry.envelope
envelope_series.rename('envelope_geometry', inplace=True)
envelope_gdf = buffers.merge(envelope_series, left_index=True, right_index=True)
envelope_gdf = envelope_gdf.drop(['geometry'], axis=1).set_geometry('envelope_geometry').rename_geometry('geometry')


## Define functions to be used

In [3]:
"""Function to parse features from GeoDataFrame in such a manner that rasterio wants them"""

def getFeatures(gdf):
        return [json.loads(gdf.to_json())['features'][0]['geometry']]

In [4]:
"""
Function to clip a GeoDataFrame to the extent of the raster. Can be used for each input image.

It should be changed for something better for better operation. Instead of clipping it should just check if the 
polygon is inside the raster's bounds and just parse it for later processing4

"""

def clipGdfToRasterExtent (raster, envelope_gdf):
    with rasterio.open(path, driver='JP2OpenJPEG') as src:
        # Get the extent of the raster
        minx, miny, maxx, maxy = src.bounds
    # NEED TO ADD A CHECKING CLAUSE FOR THE POLYGONS INTERSECTING THE BBOX
    # Clip the geodataframe to the raster's extent
    clipped_gdf = envelope_gdf.cx[minx:maxx, miny:maxy]
    return clipped_gdf

In [14]:
envelope_list = envelope_gdf.geometry.tolist()

with rasterio.open(path, driver='JP2OpenJPEG') as src:
    raster_extent = src.bounds
    
    # list initialization
    minx_list = []
    miny_list = []
    maxx_list = []
    maxy_list = []

    for poly in envelope_list:
        poly_extent = poly.bounds

        # Check if the polygon is fully inside the raster's extent
        if (poly_extent[0] >= raster_extent[0] and poly_extent[2] <= raster_extent[2] and
            poly_extent[1] >= raster_extent[1] and poly_extent[3] <= raster_extent[3]):
                minx_list.append(poly_extent[0])
                miny_list.append(poly_extent[1])
                maxx_list.append(poly_extent[2])
                maxy_list.append(poly_extent[3])

        

In [5]:
def export_image(raster, output_path, minx, miny, maxx, maxy):
    # open the raster file (Single Band)
    data = rasterio.open(raster, driver='JP2OpenJPEG')

    # Create a bounding box from the polygon min-max coordinates    
    bbox = box(minx, miny, maxx, maxy)
    # Create a geodataframe with a single polygon so that it can be used with rasterio
    geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs='32632')
    # Transform the geodataframe to a GeoJSON-like object that can be used as an input in the rasterio mask function
    coords = getFeatures(geo)
    #print(coords)
    
    # Mask and crop the raster AOI where polygon overlaps the whole raster
    out_img, out_transform = mask(data, shapes=coords, crop=True)
    # Define resolution and more
    out_profile = data.profile.copy()
    
    out_profile.update({'width': out_img.shape[2],'height': out_img.shape[1], 'transform': out_transform})
    # Write the extracted raster patch to a file
    with rasterio.open(output_path, 'w', **out_profile) as dst:
        dst.write(out_img)
    
    data.close()
    data = None

## Getting a list of all the band rasters

In [6]:
# Make a list of all .jp2 files
# Maybe this has an error. will check later

dirr = r'C:\Users\Kostas\Downloads\S2A_MSIL2A_20170420T103021_N0204_R108_T32UNB_20170420T103454.SAFE'
# This will provide all .jp2 files in every subfolder of the dirr
#jp2_files = glob.glob(dirr+"/**/*.jp2", recursive=True) 

# This will do it for just the bands 01-12. Other products are omitted
jp2_files = glob.glob(dirr+'/**/IMG_DATA/**/R??m/*B??_??m.jp2', recursive=True)
print(jp2_files)

['C:\\Users\\Kostas\\Downloads\\S2A_MSIL2A_20170420T103021_N0204_R108_T32UNB_20170420T103454.SAFE\\GRANULE\\L2A_T32UNB_A009543_20170420T103454\\IMG_DATA\\R10m\\L2A_T32UNB_20170420T103021_B02_10m.jp2', 'C:\\Users\\Kostas\\Downloads\\S2A_MSIL2A_20170420T103021_N0204_R108_T32UNB_20170420T103454.SAFE\\GRANULE\\L2A_T32UNB_A009543_20170420T103454\\IMG_DATA\\R10m\\L2A_T32UNB_20170420T103021_B03_10m.jp2', 'C:\\Users\\Kostas\\Downloads\\S2A_MSIL2A_20170420T103021_N0204_R108_T32UNB_20170420T103454.SAFE\\GRANULE\\L2A_T32UNB_A009543_20170420T103454\\IMG_DATA\\R10m\\L2A_T32UNB_20170420T103021_B04_10m.jp2', 'C:\\Users\\Kostas\\Downloads\\S2A_MSIL2A_20170420T103021_N0204_R108_T32UNB_20170420T103454.SAFE\\GRANULE\\L2A_T32UNB_A009543_20170420T103454\\IMG_DATA\\R10m\\L2A_T32UNB_20170420T103021_B08_10m.jp2', 'C:\\Users\\Kostas\\Downloads\\S2A_MSIL2A_20170420T103021_N0204_R108_T32UNB_20170420T103454.SAFE\\GRANULE\\L2A_T32UNB_A009543_20170420T103454\\IMG_DATA\\R20m\\L2A_T32UNB_20170420T103021_B02_20m.jp2',

## Patch extraction. One band only.

In [None]:
clipped_gdf = clipGdfToRasterExtent(path, envelope_gdf)
#clipped_gdf.plot()

Using string methods for naming the patches

In [None]:
# Using the split() method. It splits the string wherever an underscore appears and then accesses the second-to-last element.

# Consider adding a checking clause to check if it is in b02, b03 etc....
for i in range(0, len(jp2_files)):
    string_parts = jp2_files[i].split("_")
    band_string = string_parts[-3]+ "_" + string_parts[-2]+ "_" + string_parts[-1]

    # band_string = jp2_files[i].split("_")[-3] + "_" + jp2_files[i].split("_")[-2] + "_" + jp2_files[i].split("_")[-1]
    #print(band_string)

In [8]:
output_path = r'C:\Users\Kostas\Desktop\GIMA\Module_7\Data\PEP725\After_2016_sent_from_PEP725\pep725_outputs\pep725_high_count_days\rasters'

# Get the boundaries in minx, miny, maxx, maxy and convert them to a list
bounds = clipped_gdf.geometry.apply(lambda x : x.bounds).tolist()

NameError: name 'clipped_gdf' is not defined

In [7]:
# The bounds variable is a list of all the envelopes, but as bounds
# So, this iterates over each polygon in the raster and calls the export_image function to extract a patch for each polygon
for i in range(0, len(bounds)):
    minx, miny, maxx, maxy = bounds[i]
    output_file_name = os.path.join(output_path, f"Patch_{i}.jp2") # This needs update after the strings
    export_image(path, output_file_name, minx, miny, maxx, maxy)

NameError: name 'bounds' is not defined