In [30]:
from ccdutils import monitoringutils
import geopandas as gpd 
import pandas as pd
import os
import glob
import ee
import tqdm
from tqdm.contrib.concurrent import process_map
from itertools import repeat
import json
from datetime import datetime

# surpress TIFF warnings from gdal
from osgeo import gdal
# ... and suppress errors
gdal.SetConfigOption('CPL_LOG', '/dev/null')
os.environ["TQDM_DISABLE"] = "True"

def read_processed_cells(log_file):
    if os.path.exists(log_file):
        with open(log_file, 'r') as file:
            return [line.strip() for line in file.readlines()]
    return []

def log_cell(cell_id, log_file):
    try: 
        with open(log_file, 'a') as file:
            file.write(f"{cell_id}\n")
    except FileNotFoundError:
        with open(log_file, 'w') as file:
            file.write(f"{cell_id}")

In [None]:
### perform change detection ###
cell_directories = glob.glob("data/HR6/*")

# remove directories where data is still to be downloaded
cells_completed  = read_processed_cells(".CD_completed_cells.log")
cells_failed = read_processed_cells(".CD_failed_cells.log")


#cell_directories = [i for i in cell_directories if i[9:] not in remaining_downloads]

def batch_iterator(iterable, batch_size):
            for i in range(0, len(iterable), batch_size):
                yield iterable[i:i + batch_size]

# def process_cells(folder):
#     img_dir = glob.glob(f"{folder}/images/*.tif")
#     for img in img_dir:
#         monitoringutils.convert_image(img)

        
# for dir in tqdm.tqdm(batch_iterator(cell_directories, 50)):
#     process_map(process_cells, dir)



set()

In [6]:
def check_images(folder):
    print(folder)
    img_dir = glob.glob(f"{folder}/images/*.kea")
    aoi = f"{folder}/aoi_mask.kea"
    for img in img_dir:
        monitoringutils.check_input_image(img, aoi) # remove images
    monitoringutils.remove_images_from_metadata(folder)

In [6]:
def check_metadata_record(folder):
    print(folder)
    monitoringutils.check_for_duplicate_images(f"{folder}/images")
    monitoringutils.remove_images_from_metadata(folder) # remove metadata records
    metadata_fp = f"{folder}/image_metadata.json" # return metadata as df and get num images.
    cell_id = folder[9:]
    with open(metadata_fp, 'r') as file:
        metadata = json.load(file)
    meta_df = pd.DataFrame.from_dict(metadata, orient='index').transpose()
    meta_df.drop_duplicates(subset=['image_date'], keep='first', inplace=True) # drop duplicates based on date
    img_files = glob.glob(f"{folder}/images/*.kea") # return 
    if len(img_files) == len(meta_df):
        return
    else:
        print(f"image files {len(img_files)} and metadata record {len(meta_df)} inconsistent.")
        fn = ".metadata_record_unmatched.log"
        try: 
            with open(fn, 'a') as file:
                file.write(f"{cell_id}\n")
        except FileNotFoundError:
            with open(fn, 'w') as file:
                file.write(f"{cell_id}")

for i in cell_directories:
    check_metadata_record(i)

data/HR6/86bb58547ffffff
data/HR6/86da8c687ffffff
data/HR6/86da85cf7ffffff
data/HR6/86bb29a8fffffff
data/HR6/86daaa09fffffff
data/HR6/86da8348fffffff
data/HR6/86bb088d7ffffff
data/HR6/86ba24b8fffffff
image files 212 and metadata record 211 inconsistent.
data/HR6/86bb5e357ffffff
image files 92 and metadata record 89 inconsistent.
data/HR6/86bb5ea47ffffff
image files 84 and metadata record 83 inconsistent.
data/HR6/86bb50b9fffffff
data/HR6/86da83667ffffff
image files 105 and metadata record 104 inconsistent.
data/HR6/86bb29817ffffff
data/HR6/86da86447ffffff
data/HR6/86bb5e0afffffff
image files 124 and metadata record 123 inconsistent.
data/HR6/86bb76597ffffff
image files 208 and metadata record 207 inconsistent.
data/HR6/86da860c7ffffff
image files 152 and metadata record 151 inconsistent.
data/HR6/86bb50adfffffff
data/HR6/86da832afffffff
data/HR6/86bb7430fffffff
data/HR6/86bb52877ffffff
data/HR6/86bb74d67ffffff
data/HR6/86bb50ad7ffffff
data/HR6/86bb5e18fffffff
image files 240 and metada

In [2]:
def remove_images_from_metadata(folder):
    img_date_list = [datetime.strptime(i.split('_')[-1][:-4],"%Y%m%d").strftime("%Y-%m-%d") for i in glob.glob(f"{folder}/images/*.kea")]
    img_date_list = list(set(img_date_list)) # drop duplicates in list
    metadata_fp = f"{folder}/image_metadata.json"
    with open(metadata_fp, 'r') as file:
        metadata = json.load(file)
    meta_df = pd.DataFrame.from_dict(metadata, orient='index').transpose() # read image_metadata as pandas df
    meta_df.drop_duplicates(subset=['image_date'], keep='first', inplace=True) # drop duplicates based on date
    meta_df = meta_df[meta_df['image_date'].isin(img_date_list)] # drop row based on date
    meta_dict = meta_df.to_dict(orient='list') # write metadata to file
    with open(metadata_fp, 'w') as file:
        file.write(json.dumps(meta_dict, indent=4))
    return meta_df

In [7]:
f = "data/.backup/HR6/86da8348fffffff"

img_date_list = [datetime.strptime(i.split('_')[-1][:-4],"%Y%m%d").strftime("%Y-%m-%d") for i in glob.glob(f"{f}/images/*.kea")]

img_date_list = list(set(img_date_list)) # drop duplicates in list
img_date_list

# metadata_fp = f"{f}/image_metadata.json"
# with open(metadata_fp, 'r') as file:
#     metadata = json.load(file)
# meta_df = pd.DataFrame.from_dict(metadata, orient='index').transpose() # read image_metadata as pandas df
# print(len(meta_df))
# meta_df.drop_duplicates(subset=['image_date'], keep='first', inplace=True) # drop duplicates based on date
# print(len(meta_df))
# meta_df = meta_df[meta_df['image_date'].isin(img_date_list)] # drop row based on date
# meta_df

#remove_images_from_metadata(f)

['2022-12-02',
 '2023-02-13',
 '2019-04-15',
 '2022-10-21',
 '2019-09-07',
 '2020-02-06',
 '2022-01-26',
 '2019-01-07',
 '2021-02-20',
 '2020-02-29',
 '2003-02-24',
 '2001-01-26',
 '2019-02-14',
 '2019-09-02',
 '2006-08-12',
 '2021-09-21',
 '2004-08-22',
 '2021-10-13',
 '1999-12-14',
 '2021-03-30',
 '2022-12-30',
 '2022-12-25',
 '2018-09-21',
 '2022-10-02',
 '2005-11-13',
 '2021-03-15',
 '2023-02-25',
 '2013-09-23',
 '2022-04-01',
 '2022-02-05',
 '2019-10-04',
 '2015-04-06',
 '2021-03-10',
 '2007-02-11',
 '2021-06-02',
 '2020-12-12',
 '2021-09-29',
 '2020-02-09',
 '2022-01-19',
 '2023-06-16',
 '2020-11-10',
 '2019-01-10',
 '2019-02-09',
 '2020-02-24',
 '2003-03-12',
 '2020-03-30',
 '2022-03-02',
 '2019-04-07',
 '2022-09-09',
 '2021-04-16',
 '2019-04-05',
 '2024-02-20',
 '2024-07-12',
 '2018-12-26',
 '2023-11-06',
 '2021-04-06',
 '2005-08-09',
 '2021-02-17',
 '2019-12-08',
 '2002-02-05',
 '2011-02-06',
 '2019-03-01',
 '2023-03-05',
 '2023-11-10',
 '2023-09-03',
 '2024-01-04',
 '2023-10-