**Install dependencies**

In [1]:
# %%capture
# !pip install -U pillow gradio numpy pandas
# !pip install -U cjm_pil_utils

**Import dependencies**

In [2]:
import hashlib
from pathlib import Path

from PIL import Image
import numpy as np
import gradio as gr

import pandas as pd
pd.set_option('max_colwidth', None)  # Do not truncate the contents of cells in the DataFrame
pd.set_option('display.max_rows', None)  # Display all rows in the DataFrame
pd.set_option('display.max_columns', None)  # Display all columns in the DataFrame

In [3]:
from cjm_pil_utils.core import stack_imgs, get_img_files

**Set path to dataset**

In [4]:
dataset_path = Path(f"/mnt/980_1TB_2/Datasets/Pexels_Full_512p/")
# dataset_path = Path(f"/mnt/980_1TB_2/Datasets/Pexels_Full_768p/")

**Set path to image folder**

In [5]:
img_dir = Path(dataset_path/"images/")

**Get image paths**

In [6]:
# Get a list of the paths to the images in the images directory
img_paths = get_img_files(img_dir)

# Print the number of images in the images directory
print(len(img_paths))

109971


**Map image IDs to file paths**

In [7]:
# Create a dictionary mapping image IDs to image paths
img_path_dict = {int(path.stem.split('-')[-1]) : path for path in img_paths}

# Display the first few entries in the dictionary using a Pandas Dataframe
pd.DataFrame.from_dict(img_path_dict, orient='index').head()

Unnamed: 0,0
1000026,/mnt/980_1TB_2/Datasets/Pexels_Full_512p/images/1000026.jpeg
1000037,/mnt/980_1TB_2/Datasets/Pexels_Full_512p/images/1000037.jpeg
1000044,/mnt/980_1TB_2/Datasets/Pexels_Full_512p/images/1000044.jpeg
1000054,/mnt/980_1TB_2/Datasets/Pexels_Full_512p/images/1000054.jpeg
1000056,/mnt/980_1TB_2/Datasets/Pexels_Full_512p/images/1000056.jpeg


**Set path to style dataset**

In [8]:
style_dir = Path(f"/mnt/980_1TB_2/Datasets/cp2077-steelbook-768-depth-annotated-tyger_claws/")

# Set the path for the images directory
style_img_dir = Path(style_dir/"images/")

**Get style image paths**

In [9]:
# Get a list of the paths to the images in the images directory
style_img_paths = get_img_files(style_img_dir)

# Print the number of images in the images directory
print(len(style_img_paths))

10000


**Map image IDs to file paths**

In [10]:
# Create a dictionary mapping image IDs to image paths
style_img_path_dict = {int(path.stem.split('-')[-1]) : path for path in style_img_paths}

# Display the first few entries in the dictionary using a Pandas Dataframe
pd.DataFrame.from_dict(style_img_path_dict, orient='index').head()

Unnamed: 0,0
2894298,/mnt/980_1TB_2/Datasets/cp2077-steelbook-768-depth-annotated-tyger_claws/images/8e504a1da54d9e13bd23ceb3-2894298.png
2908928,/mnt/980_1TB_2/Datasets/cp2077-steelbook-768-depth-annotated-tyger_claws/images/000ab45b1d7f8bef5e63efcb-2908928.png
1532702,/mnt/980_1TB_2/Datasets/cp2077-steelbook-768-depth-annotated-tyger_claws/images/002abb56a8ff8f2e00ce3295-1532702.png
1629019,/mnt/980_1TB_2/Datasets/cp2077-steelbook-768-depth-annotated-tyger_claws/images/003f8f292ac9cdef091ad932-1629019.png
1943572,/mnt/980_1TB_2/Datasets/cp2077-steelbook-768-depth-annotated-tyger_claws/images/00424caa43a31361a4ad17d1-1943572.png


**Store list of image IDs**

In [11]:
style_img_ids = list(style_img_path_dict.keys())

**Initialize list of images to delete**

In [12]:
marked_img_ids = []

**Define functions for gradio interface**

In [13]:
# Current index
index = 0

def get_img_stack(img_id):
    src_img = Image.open(img_path_dict[img_id])
    style_img = Image.open(style_img_path_dict[img_id])
    return stack_imgs([src_img, style_img])

# Function to go to the previous image
def prev_image():
    global index
    global marked_img_ids
    index = index - 1 if index > 0 else len(style_img_ids)-1
    img_id = style_img_ids[index]
    btn_val = "Unmark" if img_id in marked_img_ids else "Mark to Delete"
    return get_img_stack(img_id), btn_val

# Function to go to the next image
def next_image():
    global index
    global marked_img_ids
    index = index + 1 if index < len(style_img_ids)-1 else 0
    img_id = style_img_ids[index]
    btn_val = "Unmark" if img_id in marked_img_ids else "Mark to Delete"
    return get_img_stack(img_id), btn_val

def mark_to_delete():
    global index
    global marked_img_ids
    img_id = style_img_ids[index]
    if img_id in marked_img_ids: 
        marked_img_ids.remove(img_id)
        return "Mark to Delete"
    else:
        marked_img_ids.append(img_id)
        return "Unmark"

**Create gradio interface**

In [14]:
with gr.Blocks() as demo:
    with gr.Row():
        prev_button = gr.Button('Previous')
        next_button = gr.Button('Next')
    with gr.Row():
        mark_del_button = gr.Button('Mark to Delete')
#         mark_del_check = gr.Checkbox(value=False, label='Mark to Delete')
    image_output = gr.Image(get_img_stack(style_img_ids[index]))

    prev_button.click(prev_image, outputs=[image_output, mark_del_button])
    next_button.click(next_image, outputs=[image_output, mark_del_button])
    mark_del_button.click(mark_to_delete, outputs=[mark_del_button])
#     mark_del_check.change(mark_to_delete, outputs=None)
        
demo.launch(height=1600)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




In [None]:
len(marked_img_ids)

**Delete marked images**

In [17]:
for img_id in marked_img_ids:
    style_img_path_dict[img_id].unlink()

marked_img_ids = []