In [1]:
# IMPORT REQUIRED MODULES
import os
import csv
import json
import geopandas as gpd
import pandas as pd
import xarray as xr
from datetime import datetime
from shapely.geometry import box
from urllib.request import urlopen

In [4]:
# DEFINE THE OUTPUT DIRECTORY TO WHICH THE 6-DAY PAIRS SHOULD BE DOWNLOADED
output_dir = 'R:/KOGE_BUGT/ITS_LIVE_6_DAY'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

# DEFINE THE STUDY REGION FOR THE 6-DAY PAIRS (KOGE BUGT C)
bounds = 162212, -2741663, 210963, -2712025 # xmin, ymin, xmax, ymax
aoi_3413 = gpd.GeoDataFrame(geometry=[box(*(bounds))], crs=3413)

# SEARCH AND PRINT THE NUMBER OF DATACUBES INTERSECTING THE STUDY REGION
url = 'https://its-live-data.s3.amazonaws.com/datacubes/catalog_v02.json'
response = urlopen(url)
data_json = json.loads(response.read())
df = pd.read_json('https://its-live-data.s3.amazonaws.com/datacubes/catalog_v02.json')
gdf = gpd.GeoDataFrame.from_features(df["features"])
gdf = gdf[gdf['epsg']==3413]
gdf = gdf.set_crs(4326).to_crs(3413)
gdf_intsct = gdf[gdf.intersects(aoi_3413.geometry.values[0])]
number_of_datacubes = len(gdf_intsct)
print('The number of datacubes intersecting the study region is:', number_of_datacubes)

The number of datacubes intersecting the study region is: 2


In [5]:
# PREPARE DATA STRUCTURES TO PROCESS THE ZARR FILES
url_zarr_list = gdf_intsct.zarr_url.values
xds_list = []
fn_list = []
filename_counts = {}

# ITERATE THROUGH THE LIST OF ZARR FILE, EXTRACT INFO AND PROCESS USING XARRAY 
for url_zarr in url_zarr_list:
    filename = os.path.basename(url_zarr).split('.')[0]
    fn_list.append(filename)
    print(f'Preparing dataset from tile {filename}')
    xds = xr.open_zarr(url_zarr) 
    if filename in filename_counts: 
        filename_counts[filename] += 1
        identifier = f'T{filename_counts[filename]}' 
    else:
        filename_counts[filename] = 1
        identifier = ''  
    filename = f'{filename}{identifier}'
    xds = xds[['satellite_img1','satellite_img2','acquisition_date_img1','acquisition_date_img2', 'date_dt','v']]
    xds = xds.rio.write_crs('epsg:3413')
    xds = xds.rio.clip(aoi_3413.geometry) # Clip to the study region
    xds = xds.where(((xds.date_dt.dt.days == 6) | (xds.date_dt.dt.days == 12)).compute(), drop=True) # Include 6-day and 12-day pairs
    xds = xds.where(((xds.satellite_img1 == '1A') | (xds.satellite_img1 == '1B')).compute(), drop=True)
    var_obj_list = ['satellite_img1', 'satellite_img2']
    for var in var_obj_list:
        xds[var] = xds[var].astype('str')
    xds = xds.chunk()
    xds_list.append(xds)
    print(f'Appending to list.')

Preparing dataset from tile ITS_LIVE_vel_EPSG3413_G0120_X150000_Y-2750000
Appending to list.
Preparing dataset from tile ITS_LIVE_vel_EPSG3413_G0120_X250000_Y-2750000
Appending to list.


In [6]:
# LOOP THROUGH THE XDS_LIST AND FN_LIST TOGETHER, SAVING THE GEOTIFF IMAGES
for xds, filename in zip(xds_list, fn_list):
    if not os.path.exists(f'{output_dir}/{filename}'):
        os.mkdir(f'{output_dir}/{filename}')
    print(f'Computing dataset {filename}. This may take a while...')
    with xds.compute() as xds:
        print(f'Computed dataset. Saving images...')
        filename_counts = {}
        for _, x in xds.groupby('mid_date'):
            outname = f'{x.acquisition_date_img1.dt.date.values[0]}_{x.acquisition_date_img2.dt.date.values[0]}_S{x.satellite_img1.values[0]}_S{x.satellite_img2.values[0]}'
            if outname in filename_counts:
                filename_counts[outname] += 1
                identifier = f'_T{filename_counts[outname]}'   
            else:
                filename_counts[outname] = 1
                identifier = '' 
            if identifier == '_T1':
                identifier = ''
            outfpath = f'{output_dir}/{filename}/{outname}{identifier}.tif'
            x.v.rio.to_raster(outfpath, compress='ZSTD', predictor=3, zlevel=1)
        print('Saved images.')

Computing dataset ITS_LIVE_vel_EPSG3413_G0120_X150000_Y-2750000. This may take a while...
Computed dataset. Saving images...
Saved images.
Computing dataset ITS_LIVE_vel_EPSG3413_G0120_X250000_Y-2750000. This may take a while...
Computed dataset. Saving images...
Saved images.


In [8]:
# DELETE ANY FILES WHICH HAVE A 'MID-DATE' OUTSIDE OF THE DESIRED DATE RANGE (2016-2024)
start_date = datetime(2016, 1, 1)
end_date = datetime(2023, 12, 31)
filtered_files = []
directory = 'R:/KOGE_BUGT/ITS_LIVE_6_DAY/ITS_LIVE_vel_EPSG3413_G0120_X250000_Y-2750000'

for filename in os.listdir(directory):
    if filename.endswith('.tif'):
        parts = filename.split('_')
        file_end_date = datetime.strptime(parts[1], '%Y-%m-%d')
        if file_end_date < start_date or file_end_date > end_date:
            file_path = os.path.join(directory, filename)
            os.remove(file_path)

print("Deletion of tiff files outside the desired study period complete.")

Deletion of tiff files outside the desired study period complete.
