# Data search and calculation of spectral indices for subareas - Landsat-8
## 1) Search full coverage for designated subarase

    This code searches for Landsat-8 satellite images covering specific geographic areas within a given time frame and cloud cover limit, then generates a heatmap visualizing the number of image sets that fully cover each area.


In [None]:
from eodag import EODataAccessGateway
import geopandas as gpd
from shapely.geometry import shape
from datetime import datetime
from collections import defaultdict
from itertools import combinations
import matplotlib.pyplot as plt
from matplotlib.colors import BoundaryNorm
from datetime import timedelta
from shapely.ops import unary_union

def parse_date(image_title):
    date_str = image_title.split('_')[3]  # If the format is "LC08_L2SP_190056_20151217_02_T1"
    return datetime.strptime(date_str, '%Y%m%d')

# Paths 
subareas_path = '/path/to/area_boundaries/subareas_clip_to_AOI0_4326.shp'  # subareas for searching 
tiles_path = '/path/to/area_boundaries/subareas_4326.shp'  # base for the heatmap
txt_path = '/path/to/text_file_location/textfile.txt'
heatmap_path = '/path/to/heatmap_save_location/heatmap.png'

subareas_data = gpd.read_file(subareas_path)
tiles_data = gpd.read_file(tiles_path)

cloud_cover = 25
start = "2019-01-01"
end = "2019-01-31"
provider = 'planetary_computer'

dag = EODataAccessGateway()
product_type = 'LANDSAT_C2L2'

def set_preferred_provider(self, provider):
    if provider not in self.available_providers():
        raise UnsupportedProvider(
            f"This provider is not recognised by eodag: {provider}"
        )
    preferred_provider, max_priority = self.get_preferred_provider()
    if preferred_provider != provider:
        new_priority = max_priority + 1
        self._plugins_manager.set_priority(provider, new_priority)

dag.set_preferred_provider(provider)

tile_coverage_sets = defaultdict(int)
coverage_results = defaultdict(list)

for _, row in subareas_data.iterrows():
    selected_tile_name = row['layer']
    tile_geometry = row.geometry
    print(f"Processing tile: {selected_tile_name}")
    search_results = dag.search_all(productType=product_type, geom=tile_geometry, start=start, end=end, cloudCover=cloud_cover)

    images_by_date = defaultdict(list)
    for product in search_results:
        image_date = parse_date(product.properties['title'])
        images_by_date[image_date].append(product)

    full_coverage_found = False

    for date, products in images_by_date.items():
        for product in products:
            product_geometry = shape(product.geometry)
            if product_geometry.contains(tile_geometry):
                print(f"Full coverage found with single image: {product.properties['title']}")
                coverage_results[selected_tile_name].append(product.properties['title'])
                full_coverage_found = True
                tile_coverage_sets[selected_tile_name] += 1
                break
        if full_coverage_found:
            break

    used_products = set() 

    if not full_coverage_found:
        unique_products = list({product.properties['title']: product for product in search_results}.values())
        
        for r in range(2, min(4, len(unique_products) + 1)):
            for combo in combinations(unique_products, r):
                dates = [parse_date(p.properties['title']) for p in combo]
                max_date_diff = (max(dates) - min(dates)).days
                
                if max_date_diff <= 16:
                    combined_geometry = unary_union([shape(p.geometry) for p in combo])
                    
                    if combined_geometry.contains(tile_geometry):
                        print(f"Full coverage found with images: {[p.properties['title'] for p in combo]}")
                        coverage_results[selected_tile_name] = [p.properties['title'] for p in combo]
                        tile_coverage_sets[selected_tile_name] = 1
                        full_coverage_found = True
                        break
            if full_coverage_found:
                break

unique_dates_per_tile = defaultdict(set)
for tile, titles in coverage_results.items():
    for title in titles:
        date_of_image = parse_date(title)
        unique_dates_per_tile[tile].add(date_of_image)

with open(txt_path, 'w') as file:
    for tile, dates in unique_dates_per_tile.items():
        file.write(f"{tile}: 1 unique image set\n")
        for date in sorted(dates):
            date_str = date.strftime('%Y %m %d')
            titles = [title for title in coverage_results[tile] if parse_date(title) == date]
            file.write(f"  Image ({date_str}): {' - '.join(titles)}\n")

tiles_data['num_images'] = tiles_data['layer'].map(tile_coverage_sets).fillna(0)
num_images_bins = list(range(11)) 
norm = BoundaryNorm(num_images_bins, ncolors=len(num_images_bins) - 1, clip=True)
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
heatmap = tiles_data.plot(column='num_images', ax=ax, cmap='GnBu', edgecolor='black', vmin=0, vmax=10)
subareas_data.boundary.plot(ax=ax, color='black')

for idx, row in tiles_data.iterrows():
    plt.annotate(text=f"{int(row['num_images'])}", xy=(row.geometry.centroid.x, row.geometry.centroid.y),
                 horizontalalignment='center', verticalalignment='center')

plt.text(0.5, 1.05, f"Landsat-8 Search Period: {start} to {end}", 
         ha='center', va='center', transform=ax.transAxes)

cbar = fig.colorbar(heatmap.collections[0], ax=ax, extend='neither')
cbar.set_label('Number of image sets')
cbar.set_ticks(range(0, 11)) 

plt.savefig(heatmap_path, dpi=500)

## 2) Download the images
    
This code searches for and downloads unique Landsat 8 imagery IDs from a specified text file, using EODataAccessGateway (EODAG) to interface with a preferred data provider.

In [None]:
import logging
import os
from eodag import EODataAccessGateway
import re

logging.basicConfig(level=logging.INFO)

dag = EODataAccessGateway()

file_path = '/path/to/text_file_location/textfile.txt'

unique_image_ids = set()
with open(file_path, 'r') as file:
    for line in file:
        match = re.search(r'(LC08_L2SP_\d+_\d{8}_02_T1)', line)
        if match:
            unique_image_ids.add(match.group(1))

output_directory = '/path/to/save_L-8_data'
DEFAULT_DOWNLOAD_WAIT = 2
DEFAULT_DOWNLOAD_TIMEOUT = 50

dag.set_preferred_provider('planetary_computer')

for image_id in unique_image_ids:
    search_criteria = {
        "productType": "LANDSAT_C2L2",
        "id": image_id
    }
    search_results, _ = dag.search(**search_criteria) 

    for result in search_results: 
        try:
            downloaded_path = result.download(
                progress_callback=None,
                wait=DEFAULT_DOWNLOAD_WAIT,
                timeout=DEFAULT_DOWNLOAD_TIMEOUT,
                outputs_prefix=output_directory)
            print(f"Downloaded: {downloaded_path}")
        except Exception as e:
            print(f"Error downloading image: {str(e)}")

## 3) Resampling

    
This code resamples Landsat 8 raster images to match the resolution and alignment of a reference image, saving the resampled images with preserved folder structure.

In [None]:
import os
import numpy as np
import rioxarray
from rasterio.enums import Resampling
import gc

input_base_folder = r'/path/to/L-8_data'
output_base_folder = r'/path/to/save_L-8_data_resampled'
reference_image_path = r'/path/to/reference_image.tif'

os.makedirs(output_base_folder, exist_ok=True)

reference_image = rioxarray.open_rasterio(reference_image_path)

def resample_image(input_path, output_base_folder, reference_image, nodata_value= np.nan):
    # Calculate the output path while preserving the folder structure
    relative_path = os.path.relpath(input_path, input_base_folder)
    output_path = os.path.join(output_base_folder, relative_path)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    input_image = rioxarray.open_rasterio(input_path)
    input_image = input_image.astype("float32")
    input_image.rio.write_nodata(nodata_value, inplace=True)
    resampled_image = input_image.rio.reproject_match(reference_image, resampling=Resampling.bilinear)
    
    resampled_image.rio.to_raster(output_path)
    gc.collect()

for root, _, files in os.walk(input_base_folder):
    for file_name in files:
        if file_name.endswith(".TIF"):
            input_path = os.path.join(root, file_name)
            resample_image(input_path, output_base_folder, reference_image)

print(f"Resampling completed. Resampled images saved in: {output_base_folder}")

## 4) Calculation of spectral indices

    This code automates the processing of Landsat 8 satellite imagery by clipping, merging, and calculating various spectral indices for areas of interest defined in a shapefile, based on configurations specified in a text file.


In [None]:
import re
import os
import datetime
import numpy as np
import shutil
import geopandas as gpd
import rasterio
from rasterio.mask import mask
from rasterio.merge import merge
from rasterio.plot import show

shp_file = '/path/to/area_boundaries/subareas_clip_to_AOI0_4326.shp'
txt_file = '/path/to/text_file_location/textfile.txt'
output_base_folder = '/path/to/output_files_save_location/landsat8'
base_folder = '/path/to/L-8_data_resampled'

def generate_image_paths(txt_file, base_folder):
    image_paths = {}
    with open(txt_file, 'r') as file:
        lines = file.readlines()

    current_aoi = None
    for line in lines:
        line = line.strip()
        if line.startswith('AOI'):
            current_aoi = line.split(':')[0].strip()
            image_paths[current_aoi] = []
        elif line.startswith('Image'):
            if current_aoi:
                parts = line.split(':')
                if len(parts) == 2:
                    image_id = parts[1].strip()
                    image_name = image_id.split(':')[-1].strip()
                    image_path = os.path.join(base_folder, image_name)
                    image_paths[current_aoi].append({'path': image_path, 'bands': {}})
                else:
                    print(f"Invalid line format: {line}")
            else:
                print("AOI name not found.")
                continue

    return image_paths

def find_tif_files_for_bands(image_paths):
    for aoi, images in image_paths.items():
        for image in images:
            image_path = image['path']
            band_paths = {"B2": None, "B3": None, "B4": None, "B5": None, "B6": None, "B7": None}
            for root, dirs, files in os.walk(image_path):
                for file in files:
                    if file.upper().endswith(".TIF"):
                        for band in band_paths.keys():
                            band_pattern = f"_SR_{band}.TIF"
                            if band_pattern in file.upper():
                                band_paths[band] = os.path.join(root, file)
            image['bands'] = band_paths
    return image_paths


# CLIPPING SPECTRAL BANDS TO SUBAREA BOUNDARIES                  
def get_subarea_bounds(shp_file, subarea):
    gdf = gpd.read_file(shp_file)
    subarea_geom = gdf.loc[gdf['layer'] == subarea, 'geometry'].unary_union
    return subarea_geom

def extract_date_from_image_name(image_name):
    date_str = re.search(r'\d{8}', image_name).group()
    return datetime.datetime.strptime(date_str, '%Y%m%d').strftime('%Y-%m-%d')

def clip_raster_to_aoi(image_path, bounds, output_path):
    with rasterio.open(image_path) as src:
        out_image, out_transform = mask(src, shapes=[bounds], crop=True)
        out_meta = src.meta.copy()

        out_meta.update({
            "driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform
        })

        with rasterio.open(output_path, "w", **out_meta) as dest:
            dest.write(out_image)

def process_and_clip_images(image_paths, shp_file, output_base_folder):
    for aoi, images in image_paths.items():
        aoi_folder = os.path.join(output_base_folder, aoi)
        if not os.path.exists(aoi_folder):
            os.makedirs(aoi_folder)
        
        for image in images:
            date = extract_date_from_image_name(os.path.basename(image['path']))
            date_folder = os.path.join(aoi_folder, f"{aoi}_{date}")
            if not os.path.exists(date_folder):
                os.makedirs(date_folder)

            bounds = get_subarea_bounds(shp_file, aoi)

            for band, band_path in image['bands'].items():
                if band_path:
                    output_path = os.path.join(date_folder, f"{band}_{date}.tif")
                    clip_raster_to_aoi(band_path, bounds, output_path)

def merge_images_with_rasterio(image_paths, output_path):
    src_files_to_mosaic = []
    for path in image_paths:
        src = rasterio.open(path)
        src_files_to_mosaic.append(src)

    mosaic, out_trans = merge(src_files_to_mosaic)

    out_meta = src.meta.copy()
    out_meta.update({"driver": "GTiff",
                     "height": mosaic.shape[1],
                     "width": mosaic.shape[2],
                     "transform": out_trans,
                     "crs": src.crs})

    with rasterio.open(output_path, "w", **out_meta) as dest:
        dest.write(mosaic)

    for src in src_files_to_mosaic:
        src.close()

# CALCULATING SPECTRAL INDICES
def calculate_all_indices_landsat(output_folder, red_path, nir_path, green_path, blue_path, swir1_path, swir2_path, date, aoi):
    with rasterio.open(red_path) as red_src, \
         rasterio.open(nir_path) as nir_src, \
         rasterio.open(green_path) as green_src, \
         rasterio.open(blue_path) as blue_src, \
         rasterio.open(swir1_path) as swir1_src, \
         rasterio.open(swir2_path) as swir2_src:

        red = red_src.read(1, masked=True).astype('float32') / 65535.0
        nir = nir_src.read(1, masked=True).astype('float32') / 65535.0
        green = green_src.read(1, masked=True).astype('float32') / 65535.0
        blue = blue_src.read(1, masked=True).astype('float32') / 65535.0
        swir1 = swir1_src.read(1, masked=True).astype('float32') / 65535.0
        swir2 = swir2_src.read(1, masked=True).astype('float32') / 65535.0
    
        np.seterr(divide='ignore', invalid='ignore')

        red[red == 0] = np.nan
        nir[nir == 0] = np.nan
        green[green == 0] = np.nan
        blue[blue == 0] = np.nan
        swir1[swir1 == 0] = np.nan
        swir2[swir2 == 0] = np.nan    

        ndvi = (nir - red) / (nir + red)
        gndvi = (nir - green) / (nir + green)
        evi = 2.5 * ((nir - red) / (nir + 6 * red - 7.5 * blue + 1))
        savi = (1 + 0.5) * ((nir - red) / (nir + red + 0.5)) 
        osavi = ((nir - red) / (nir + red + 0.16))
        dvi = nir - red
        sr = red / nir
        gemi = ((2 * (nir ** 2 - red ** 2) + 1.5 * nir + 0.5 * red) / (nir + red + 0.5)) * (1 - 0.25 * ((2 * (nir ** 2 - red ** 2) + 1.5 * nir + 0.5 * red) / (nir + red + 0.5))) - ((red - 0.125) / (1 - nir))
        ndwi = (green - nir) / (green + nir)
        mndwi = (green - swir1) / (green + swir1)
        lswi = (nir - swir1) / (nir + swir1)
        ui = (swir2 - nir) / (swir2 + nir)
        ndbi = (swir1 - nir) / (swir1 + nir)
        mndbi = (swir2 - blue) / (swir2 + blue)      
        bsi = ((red + swir1) - (nir + blue)) / ((red + swir1) + (nir + blue))                                                                          

        indices = {"NDVI": ndvi, "GNDVI": gndvi, "EVI": evi, "SAVI": savi, "OSAVI": osavi, "DVI": dvi, "SR": sr, "GEMI": gemi, "NDWI": ndwi, "BSI": bsi, "MNDBI": mndbi, "LSWI": lswi, "MNDWI": mndwi, "NDBI": ndbi, "UI": ui}

        for index_name, index_data in indices.items():
            output_file = os.path.join(output_folder, f"L8_{aoi}_{index_name}_{date}.tif")

            with rasterio.open(output_file, 'w', driver='GTiff', width=red.shape[1], height=red.shape[0], count=1, dtype=str(index_data.dtype), crs=red_src.crs, transform=red_src.transform) as dst:
                dst.write(index_data, 1)

# PROCESSING
def process_area_bands_landsat(image_paths, shp_file, output_base_folder):
    
    for aoi, images in image_paths.items():
        dates = [extract_date_from_image_name(os.path.basename(image['path'])) for image in images]
        dates.sort()
        
        if len(dates) > 1:
            date_range = f"{dates[0]}_{dates[-1]}"
        else:
            date_range = dates[0]
        
        aoi_folder = os.path.join(output_base_folder, f"{aoi}_{date_range}")
        if not os.path.exists(aoi_folder):
            os.makedirs(aoi_folder)
        
        all_bands_paths = {band: [] for band in ["B2", "B3", "B4", "B5", "B6", "B7"]}

        for image in images:
            date = extract_date_from_image_name(os.path.basename(image['path']))
            bounds = get_subarea_bounds(shp_file, aoi)

            for band, band_path in image['bands'].items():
                if band_path:
                    output_path = os.path.join(aoi_folder, f"{band}_{date}.tif")
                    clip_raster_to_aoi(band_path, bounds, output_path)
                    all_bands_paths[band].append(output_path)

        if len(set(dates)) > 1:
            for band, paths in all_bands_paths.items():
                merged_output_path = os.path.join(aoi_folder, f"{band}_merged.tif")
                merge_images_with_rasterio(paths, merged_output_path)
                all_bands_paths[band] = [merged_output_path]
        else:
            pass

        # If all necessary bands are present, calculate spectral indices
        if all([all_bands_paths[band] for band in ["B2", "B3", "B4", "B5", "B6", "B7"]]):
            calculate_all_indices_landsat(aoi_folder, 
                                          all_bands_paths["B4"][0], all_bands_paths["B5"][0], 
                                          all_bands_paths["B3"][0], all_bands_paths["B2"][0], 
                                          all_bands_paths["B6"][0], all_bands_paths["B7"][0], 
                                          date_range, aoi)

        # Deleting unnecessary files
        for file in os.listdir(aoi_folder):
            file_path = os.path.join(aoi_folder, file)
            if not file.startswith("L8_"):
                if os.path.isfile(file_path):
                    os.remove(file_path)
                elif os.path.isdir(file_path):
                    shutil.rmtree(file_path, ignore_errors=True)


# Initialize and process each area
start_time_whole_process = datetime.datetime.now()

image_paths = generate_image_paths(txt_file, base_folder)
image_paths = find_tif_files_for_bands(image_paths)
process_area_bands_landsat(image_paths, shp_file, output_base_folder)

end_time_whole_process = datetime.datetime.now()
elapsed_time_whole_process = (end_time_whole_process - start_time_whole_process).total_seconds() / 60
print(f"Total processing time: {elapsed_time_whole_process} minutes")