# Same day image mosaic

Merge/Mosaic images that are same date and rename

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
import rasterio
import rioxarray
from rioxarray.merge import merge_arrays
from tqdm import tqdm

In [2]:
def extract_date_sen2(fp):
    return pd.Timestamp(os.path.basename(fp).split("_")[0]).date()

In [8]:
raw_dir_sen2 = "/home/xinchenh/LData/sentinel2_images/rgb_raw"
mosaic_dir_sen2 = "/home/xinchenh/LData/sentinel2_images/rgb_mosaic"

In [20]:
def mosaic_S2_images(lake_id,
                       raw_dir,
                       mosaic_dir):
    # find the tif paths
    sen2_tifs = glob.glob(f"{raw_dir}/{lake_id}/*.tif")
    
    # create directory for this lake
    try:
        os.mkdir(f"{mosaic_dir}/{lake_id}")
    except:
        pass

    # create a match table between file path and date
    table = pd.DataFrame(sen2_tifs, columns = ["fp"])
    table["dt"] = table.fp.apply(extract_date_sen2)
    table = table.sort_values("dt").reset_index(drop = True)
    table = table.groupby("dt").agg(lambda x: ','.join(x).split(","))
    table["count"] = table.fp.apply(len)
    
    # loop on each date
    for dt in tqdm(table.index):
        # get the file path for each image
        image_fps = table.loc[dt]["fp"]

        if len(image_fps) == 1:
            # only 1 image, no need to mosaic, just copy to the output dir    
            merged_raster = rioxarray.open_rasterio(image_fps[0])
        else:
            # use the first image crs
            tile_list = []
            crs = rioxarray.open_rasterio(image_fps[0]).rio.crs

            # read each image
            # reproject to the first image crs
            for image in image_fps:
                tile = rioxarray.open_rasterio(image).rio.reproject(crs)
                tile_list.append(tile)

            tile.close()
            
            # merge tiles
            merged_raster = merge_arrays(dataarrays=tile_list, 
                        crs = crs)

        # export the raster with the aquisition date
        date_string = dt.strftime("%Y_%m_%d")
        file_name = date_string + "_mosaic_S2.tif"
        merged_raster.rio.to_raster(f"{mosaic_dir}/{lake_id}/{file_name}")
    try:
        merged_raster.close()
    except:
        pass
    return

In [21]:
lake_id_list = [int(os.path.basename(fp)) for fp in glob.glob(f"{raw_dir_sen2}/*")]

In [22]:
# loop
for lake_id in lake_id_list:
    mosaic_S2_images(lake_id = lake_id,
                       raw_dir = raw_dir_sen2,
                       mosaic_dir = mosaic_dir_sen2)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 52/52 [00:00<00:00, 75.29it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 24/24 [00:01<00:00, 21.96it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 38/38 [00:00<00:00, 73.40it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 14/14 [00:00<00:00, 21.88it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:00<00:00, 45.79it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 18/18 [00:00<00:00, 77.45it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 17/17 [00:00<00:00, 75.30it/s]
100%|█

In [4]:
# def mosaic_S1_images(lake_id,
#                        raw_dir,
#                        mosaic_dir):
#     # find the tif paths
#     sen1_tifs = glob.glob(f"{raw_dir}/{lake_id}/*.tif")

#     # create directory for this lake
#     try:
#         os.mkdir(f"{mosaic_dir}/{lake_id}")
#     except:
#         pass

#     # create a match table between file path and date
#     table = pd.DataFrame(sen1_tifs, columns = ["fp"])
#     table["dt"] = table.fp.apply(extract_date_sen1)
#     table = table.sort_values("dt").reset_index(drop = True)
#     table = table.groupby("dt").agg(lambda x: ','.join(x).split(","))
#     table["count"] = table.fp.apply(len)

#     # loop on each date
#     for dt in tqdm(table.index):
#         # get the file path for each image
#         image_fps = table.loc[dt]["fp"]

#         if len(image_fps) == 1:
#             # only 1 image, no need to mosaic, just copy to the output dir    
#             merged_raster = rioxarray.open_rasterio(image_fps[0])
#         else:
#             # use the first image crs
#             tile_list = []
#             crs = rioxarray.open_rasterio(image_fps[0]).rio.crs

#             # read each image
#             # reproject to the first image crs
#             for image in image_fps:
#                 tile = rioxarray.open_rasterio(image).rio.reproject(crs)
#                 tile_list.append(tile)

#             # merge tiles
#             merged_raster = merge_arrays(dataarrays=tile_list, 
#                         crs = crs)
#             tile.close()



#         # export the raster with the aquisition date
#         date_string = dt.strftime("%Y_%m_%d")
#         file_name = date_string + "_mosaic_S1.tif"
#         merged_raster.rio.to_raster(f"{mosaic_dir}/{lake_id}/{file_name}")
#         merged_raster.close()

#     return

In [5]:
# mosaic_S1_images(lake_id = lake_id,
#                        raw_dir = raw_dir_sen1,
#                        mosaic_dir = mosaic_dir_sen1)