In [7]:
import os
from dotenv import load_dotenv
import geopandas as geopd
import pandas as pd
import rasterio
from pathlib import Path

load_dotenv()
os.chdir("/home/me/workspace/det_remota/trabalho_final")

In [8]:
RAW_FILES_PATH = Path("data/sentinel2/raw")

def parse_file_path(path:Path):
    file_info = str(path).replace('data/sentinel2/raw/', '')
    file_info = file_info.replace('.tiff', '')
    layer, year, month, grid_cell = file_info.split('/')
    return {
        'layer' :layer,
        'year' :year,
        'month' :month,
        'grid_cell' :grid_cell,
        'path': path,
    }

files_df = pd.DataFrame(
    [
        parse_file_path(path)
        for path in RAW_FILES_PATH.glob('**/*.tiff')
    ]
)
files_df.groupby(["layer", "year", "month"]).path.count()

layer             year  month
FALSE-COLOR       2017  8        21
                  2018  8        21
MOISTURE          2017  8        21
                  2018  8        21
MSAVI2            2017  8        21
                  2018  8        21
NDWI              2017  8        21
                  2018  8        21
TRUE_COLOR        2017  8        21
                  2018  8        21
VEGETATION_INDEX  2017  8        21
                  2018  8        21
Name: path, dtype: int64

In [9]:
import rasterio.merge
import shutil

MERGED_TIFFS_PATH = Path('data/sentinel2/merged')


def merge_tiff(layer, year, month, path: list[Path]):
    out_dir = MERGED_TIFFS_PATH.joinpath(f'{layer}/{year}/{layer}_{year}_{month.zfill(2)}.tiff')
    out_dir.parent.mkdir(parents=True, exist_ok=True)

    dataset_list = [
        rasterio.open(_path)
        for _path in path
    ]

    if len(dataset_list) < 21:
        print(f"Dataset was not complete for layer {layer} - {year}/{month}")
        print(f"Only {len(dataset_list)} rasters were found")
        return 0
    
    print(f'Will merge tiff files for layer {layer} - {year}/{month}')
    rasterio.merge.merge(
        dataset_list,
        use_highest_res=True,
        dst_path=out_dir
    )

    return len(dataset_list)

if True:
    if MERGED_TIFFS_PATH.exists():
        shutil.rmtree(MERGED_TIFFS_PATH)

    MERGED_TIFFS_PATH.mkdir(parents=True)

    files_df\
        .groupby(['layer', 'year', 'month'])\
        .path.unique().reset_index()\
        .apply(lambda x: merge_tiff(**x), axis=1)

Will merge tiff files for layer FALSE-COLOR - 2017/8
Will merge tiff files for layer FALSE-COLOR - 2018/8
Will merge tiff files for layer MOISTURE - 2017/8
Will merge tiff files for layer MOISTURE - 2018/8
Will merge tiff files for layer MSAVI2 - 2017/8
Will merge tiff files for layer MSAVI2 - 2018/8
Will merge tiff files for layer NDWI - 2017/8
Will merge tiff files for layer NDWI - 2018/8
Will merge tiff files for layer TRUE_COLOR - 2017/8
Will merge tiff files for layer TRUE_COLOR - 2018/8
Will merge tiff files for layer VEGETATION_INDEX - 2017/8
Will merge tiff files for layer VEGETATION_INDEX - 2018/8
