# Annotation curation

First, adjust the image directory location in the cell below. It needs to be an absolute path.

Start the interface by clicking on the ">" button in the top menu bar, and use the interface which appears below to adjust any mistakes, then click "Save". If no corrections are required, click "NEXT"

In [None]:
IMAGES_ROOT_PATH = '/tmp/global2/ibezrukov2/herbarium/all_images'

In [None]:
import folium
from IPython.display import IFrame
import json
import glob
import os
import base64
from PIL import Image
import io
import ipywidgets as widgets
from IPython.display import display, clear_output

#----- Configuration -----

# Assuming IMAGES_ROOT_PATH is defined elsewhere
os.chdir(IMAGES_ROOT_PATH)  

image_dir = 'processed_images_orientated'
json_dir = 'geocoded_transcriptions'
processed_json_dir = 'corrected_transcriptions'  # Directory to save processed JSON files

image_files = []
json_files = []

for filename in os.listdir(image_dir):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        image_files.append(os.path.join(image_dir, filename))
        # Find the corresponding JSON file
        image_base_name = os.path.basename(filename).split('.')[0]  # Image name without extension
        json_file = glob.glob(os.path.join(json_dir, f"{image_base_name}*.json"))
        if json_file:
            json_file = json_file[0]
            json_files.append(json_file)

# assert len(image_files) == len(json_files)

processed_json_files = [os.path.join(processed_json_dir, f"{os.path.basename(f)}") for f in json_files]
os.makedirs(processed_json_dir, exist_ok=True)

#----- Widgets setup -----
current_index = 0

out_map = widgets.Output()
out_image = widgets.Output()
json_text = widgets.Textarea(
    value='',
    placeholder='JSON metadata...',
    description='',
    layout=widgets.Layout(width='100%', height='400px')
)
prev_btn = widgets.Button(description='Previous')
next_btn = widgets.Button(description='Next')
save_btn = widgets.Button(description='Save')
keep_geoloc_checkbox = widgets.Checkbox(value=True, description='Keep geoloc')
warning_label = widgets.Label(value='', layout=widgets.Layout(height='30px', width='100%'))
jump_to_text = widgets.Text(
    value='',
    placeholder='Enter substring...',
    description=''
)
jump_to_btn = widgets.Button(description='Jump to')

def show(index):
    # Display map
    out_map.clear_output()
    with out_map:
        try:
            with open(json_files[index], 'r', encoding='utf-8') as f:
                data = json.load(f)
                if "geocoding" in data and "results" in data["geocoding"]:
                    # Create a folium map instance
                    location = data["geocoding"]["results"][0]["geometry"]["location"]
                    bounds = data["geocoding"]["results"][0]["geometry"]["bounds"]
                    
                    m = folium.Map(location=[location["lat"], location["lng"]], zoom_start=6)

                    # Add a bounding box to the map
                    southwest = bounds["southwest"]
                    northeast = bounds["northeast"]
                    folium.Rectangle(
                        bounds=[[southwest["lat"], southwest["lng"]], [northeast["lat"], northeast["lng"]]],
                        color='blue', fill=True, fill_color='blue', fill_opacity=0.1
                    ).add_to(m)
                    
                    map_html = m._repr_html_().encode('utf-8')
                    map_base64 = base64.b64encode(map_html).decode('utf-8')
                    html_data_url = f'data:text/html;base64,{map_base64}'
                    
                    display(IFrame(html_data_url, width='100%', height='600px'))

        except Exception as e:
            display(widgets.Label(value='Error displaying map'))

    # Display Image
    out_image.clear_output()
    with out_image:
        im = Image.open(image_files[index])
        im.thumbnail((800, 800), Image.LANCZOS)
        display(im)

    # Load JSON
    try:
        with open(json_files[index], 'r', encoding='utf-8') as f:
            data = json.load(f)
        text = json.dumps(data, indent=2, ensure_ascii=False)
    except Exception as e:
        text = "// INVALID JSON or missing file\n{}"
    json_text.value = text
    warning_label.value = ""

def save_json(index):
    try:
        data = json.loads(json_text.value)
        
        # If Keep geoloc is unchecked, remove the geocoding field
        if not keep_geoloc_checkbox.value:
            if "geocoding" in data:
                del data["geocoding"]

        with open(processed_json_files[index], 'w', encoding='utf-8') as f:
            json.dump(data, f, indent=2, ensure_ascii=False)
        save_btn.description = "Saved!"
        warning_label.value = ""
    except json.JSONDecodeError as e:
        save_btn.description = "Save"
        warning_label.value = f'❌ Error: Invalid JSON! {e.msg} (line {e.lineno}, column {e.colno})'
    except Exception as e:
        save_btn.description = "Save"
        warning_label.value = f'❌ Save failed: {str(e)}'

def on_prev(_):
    global current_index
    if current_index > 0:
        current_index -= 1
        keep_geoloc_checkbox.value = True
        show(current_index)
        save_btn.description = "Save"

def on_next(_):
    global current_index
    if current_index < len(image_files)-1:
        current_index += 1
        keep_geoloc_checkbox.value = True
        show(current_index)
        save_btn.description = "Save"

def on_save(_):
    save_json(current_index)

def on_jump_to(_):
    global current_index
    substring = jump_to_text.value.strip()
    if substring:
        for i, json_file in enumerate(json_files):
            if substring in os.path.basename(json_file):
                current_index = i
                keep_geoloc_checkbox.value = True
                show(current_index)
                save_btn.description = "Save"
                warning_label.value = ""
                return
        warning_label.value = f'❌ No match found for "{substring}"'




prev_btn.on_click(on_prev)
next_btn.on_click(on_next)
save_btn.on_click(on_save)
jump_to_btn.on_click(on_jump_to)

# Layout
ui = widgets.HBox([
    out_map,
    out_image,
    widgets.VBox([
        json_text,
        warning_label,
        widgets.HBox([keep_geoloc_checkbox, prev_btn, next_btn, save_btn]),
        widgets.HBox([jump_to_text, jump_to_btn]),
    ], layout=widgets.Layout(width="50%"))
])

show(current_index)
display(ui)

HBox(children=(Output(), Output(), VBox(children=(Textarea(value='{\n  "label": {\n    "district": "Gokwe",\n …