This script processes a set of raster files representing land-use categories and tree cover data. The main goal is to crop and mask the tree cover and deforestation raster files based on the given land-use category raster files. The script performs the following steps:

1. Import necessary libraries and set up the required paths and constants.
2. Define a crop_and_mask_raster function that takes the paths of the source raster, property raster, output raster, and the maximum height and width. This function reads the source and property rasters, pads them to the same dimensions, creates a mask using the property raster, and applies the mask to the source raster. Finally, the masked raster is saved with updated metadata.
3. Define a get_mask_year helper function that extracts the year from the mask file name.
4. Loop through the sorted raster mask files and create an output folder for each mask file. Copy the mask raster file to the output folder.
5. Determine the maximum dimensions and deforestation files up to the matching year of the mask.
6. Process the 'tree_cover_10_percent_and_above.tif' file by calling the crop_and_mask_raster function with the appropriate parameters.
7. Loop through the deforestation files and process each one using the crop_and_mask_raster function, resulting in masked rasters for both tree cover and deforestation files.

The output is a set of masked raster files for each land-use category, which are cropped and masked based on the input tree cover and deforestation raster files.



# Import Libraries

In [1]:
import os
import sys
import rasterio
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import shutil
from pathlib import Path
from rasterio.mask import mask


# Import Constants

In [2]:
# Get the current working directory
current_dir = os.path.abspath('')

# Search for the 'constants.py' file starting from the current directory and moving up the hierarchy
project_root = current_dir
while not os.path.isfile(os.path.join(project_root, 'constants.py')):
    project_root = os.path.dirname(project_root)

# Add the project root to the Python path
sys.path.append(project_root)


In [3]:
from constants import SERVER_PATH, OUTPUT_PATH, LUP_LUT_RASTER, DEFORESTATION_1120_PATH, TREECOVER_2010


In [4]:
# Constants
# update the first rasters_to_use_as_mask with the folder with the raster you want to use as a mask
raster_to_use_as_mask = LUP_LUT_RASTER

# new output directory
output_folder = os.path.join(SERVER_PATH, 'ml_data', 'output', 'masked_rasters')


In [5]:
def crop_and_mask_raster(src_raster_path, property_raster_path, output_raster_path, max_height, max_width):
    with rasterio.open(src_raster_path) as src:
        # Read the source raster
        src_data = src.read(1)
        
        with rasterio.open(property_raster_path) as property_raster:
            # Read the property limits raster
            property_data = property_raster.read(1)

            # Pad both the source data and property data to the maximum dimensions
            src_data = np.pad(src_data, ((0, max_height - src_data.shape[0]), (0, max_width - src_data.shape[1])), constant_values=0)
            property_data = np.pad(property_data, ((0, max_height - property_data.shape[0]), (0, max_width - property_data.shape[1])), constant_values=0)

            # Create a mask using the padded property limits raster, where zero values are considered as nodata
            mask_data = (property_data != 0)

            # Check if the nodata value is set for the source raster, otherwise use a default value
            nodata_value = src.nodata if src.nodata is not None else -1

            # Apply the mask to the source raster
            src_data_masked = np.where(mask_data, src_data, nodata_value)
            print(f"Source data shape:{src_data.shape} Property data shape:{property_data.shape} Mask data shape:, {mask_data.shape} After masking: Source data masked shape:, {src_data_masked.shape}")

             # Save the masked raster
            profile = src.profile
            profile['height'] = max_height
            profile['width'] = max_width
            profile['nodata'] = nodata_value
            profile['dtype'] = 'int16'
            with rasterio.open(output_raster_path, 'w', **profile) as dst:
                dst.write(src_data_masked.astype(profile['dtype']), 1)



In [None]:
# Helper function to extract the number from the deforestation file name
'''def get_deforestation_number(file_path):
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    number = int(file_name.split('_')[-1])
    return number

# Helper function to extract the year from the mask file name
def get_mask_year(file_path):
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    year = int(file_name.split('_')[1])
    return year'''


In [None]:
'''# This creates individual deforestation files increasing for each year
# Process
raster_mask_files = sorted([path for path in Path(rasters_to_use_as_mask).rglob('*_raster.tif')])

for file_path in raster_mask_files:
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    print(f"Processing {file_name}.tiff...")
    output_folder = os.path.join(output_base_folder, file_name)
    os.makedirs(output_folder, exist_ok=True)

    # Copy the mask raster file to the output folder
    shutil.copy(file_path, os.path.join(output_folder, f'{file_name}_mask.tif'))

    # Determine maximum dimensions and deforestation files up to the matching year of the mask
    mask_year = get_mask_year(file_path)
    deforestation_files = sorted([path for path in Path(deforestation_base_folder).rglob('deforestation_*.tif') if get_deforestation_number(path) <= mask_year], key=get_deforestation_number)

    max_height, max_width = 0, 0
    with rasterio.open(tree_cover_base_path) as tree_cover_src:
        for deforestation_file_path in deforestation_files:
            with rasterio.open(deforestation_file_path) as deforestation_src, rasterio.open(file_path) as mask_src:
                # Determine the maximum dimensions of all rasters
                max_height = max(max_height, mask_src.height, tree_cover_src.height, deforestation_src.height)
                max_width = max(max_width, mask_src.width, tree_cover_src.width, deforestation_src.width)

    # Process tree_cover_10_percent_and_above.tif
    tree_cover_output_raster = os.path.join(output_folder, 'tree_cover_10_percent_and_above_masked.tif')
    crop_and_mask_raster(tree_cover_base_path, file_path, tree_cover_output_raster, max_height, max_width)

    # Process deforestation_XX.tif
    for deforestation_file_path in deforestation_files:
        deforestation_file_name = os.path.splitext(os.path.basename(deforestation_file_path))[0]
        deforestation_output_raster = os.path.join(output_folder, f'{deforestation_file_name}_masked.tif')
        crop_and_mask_raster(deforestation_file_path, file_path, deforestation_output_raster, max_height, max_width)

    # Process lup_{year}_grupo_raster.tif (masking it on itself)
    lup_masked_output_raster = os.path.join(output_folder, f'{file_name}_masked.tif')
    crop_and_mask_raster(file_path, file_path, lup_masked_output_raster, max_height, max_width)'''  


In [None]:
# Process
'''raster_mask_files = sorted([path for path in Path(rasters_to_use_as_mask).rglob('*_raster.tif')])

for file_path in raster_mask_files:
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    print(f"Processing {file_name}.tiff...")
    output_folder = os.path.join(output_base_folder, file_name)
    os.makedirs(output_folder, exist_ok=True)

    # Determine maximum dimensions and deforestation files up to the matching year of the mask
    mask_year = get_mask_year(file_path)
    all_deforestation_files = list(Path(deforestation_base_folder).rglob('deforestation_*.tif'))

    for path in all_deforestation_files:
        deforestation_number = get_deforestation_number(path)
        deforestation_files = sorted([path for path in Path(deforestation_base_folder).rglob('deforestation_*.tif') if get_deforestation_number(path) <= mask_year], key=get_deforestation_number)

    max_height, max_width = 0, 0
    with rasterio.open(tree_cover_base_path) as tree_cover_src:
        for deforestation_file_path in deforestation_files:
            with rasterio.open(deforestation_file_path) as deforestation_src, rasterio.open(file_path) as mask_src:
                # Determine the maximum dimensions of all rasters
                max_height = max(max_height, mask_src.height, tree_cover_src.height, deforestation_src.height)
                max_width = max(max_width, mask_src.width, tree_cover_src.width, deforestation_src.width)
                print(max_height)
                print(max_width)

    # Process tree_cover_10_percent_and_above.tif
    tree_cover_output_raster = os.path.join(output_folder, 'tree_cover_10_percent_and_above_masked.tif')
    crop_and_mask_raster(tree_cover_base_path, file_path, tree_cover_output_raster, max_height, max_width)

    # Process deforestation_XX.tif
    # Find the corresponding deforestation file for the current year
    corresponding_deforestation_file = None
    for deforestation_file_path in deforestation_files:
        if get_deforestation_number(deforestation_file_path) == mask_year:
            corresponding_deforestation_file = deforestation_file_path
            break

    if corresponding_deforestation_file:
        deforestation_file_name = os.path.splitext(os.path.basename(corresponding_deforestation_file))[0]
        deforestation_output_raster = os.path.join(output_folder, f'{deforestation_file_name}_masked.tif')
        crop_and_mask_raster(corresponding_deforestation_file, file_path, deforestation_output_raster, max_height, max_width)

    # Process lup_{year}_grupo_raster.tif (masking it on itself)
    lup_masked_output_raster = os.path.join(output_folder, f'{file_name}_masked.tif')
    crop_and_mask_raster(file_path, file_path, lup_masked_output_raster, max_height, max_width)'''


In [6]:
# Process
max_height, max_width = 0, 0
with rasterio.open(TREECOVER_2010) as tree_cover_src:
     with rasterio.open(DEFORESTATION_1120_PATH) as deforestation_src, rasterio.open(LUP_LUT_RASTER) as mask_src:
        # Determine the maximum dimensions of all rasters
        max_height = max(max_height, mask_src.height, tree_cover_src.height, deforestation_src.height)
        max_width = max(max_width, mask_src.width, tree_cover_src.width, deforestation_src.width)
        print(max_height)
        print(max_width)

# Process TREECOVER_2010
tree_cover_output_raster = os.path.join(output_folder, 'treecover_2010_masked.tif')
crop_and_mask_raster(TREECOVER_2010, raster_to_use_as_mask, tree_cover_output_raster, max_height, max_width)

    # Process DEFORESTATION_1120_PATH
deforestation_1120_output_raster = os.path.join(output_folder, 'deforestation11_20_masked.tif')
crop_and_mask_raster(DEFORESTATION_1120_PATH, raster_to_use_as_mask, deforestation_1120_output_raster, max_height, max_width)   

# Process LUP_LUT_RASTER
lup_masked_output_raster = os.path.join(output_folder,'lup_10_masked.tif')
crop_and_mask_raster(LUP_LUT_RASTER, raster_to_use_as_mask, lup_masked_output_raster, max_height, max_width)


22512
20381
Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)
Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)
Source data shape:(22512, 20381) Property data shape:(22512, 20381) Mask data shape:, (22512, 20381) After masking: Source data masked shape:, (22512, 20381)
