In [None]:
import os
import ipysheet

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

from tqdm.auto import tqdm
from functools import partial
from ipywidgets import Label, IntProgress, Image, Button, HBox, VBox, Layout
from IPython.display import display, clear_output

tqdm.pandas()

In [None]:
class Batcher:
    def __init__(self, skip=0, limit=10, count=100):
        self.skip = skip
        self.limit = limit
        self.count = count
        self.current = 1
        self.end = round(count/limit)
    
    def increment(self):
        if self.current < self.end:
            self.current += 1
            self.skip += self.limit
    
    def decrement(self):
        if self.current > 1:
            self.current -= 1
            self.skip -= self.limit
    
    def __iter__(self, sentinel=False):
        return iter(self.increment, sentinel)

def generate_preview(file: str) -> Image:
    return Image(value=open(file, "rb").read(), format=file.split(".")[-1], width=80)

def generate_sheet_with_preview(df: pd.DataFrame, path: str="path", skip: int=0, limit: int=10) -> ipysheet.sheet:
    sheet = ipysheet.sheet(rows=limit, columns=len(df.columns[1:])+1, column_headers=["preview"]+list(df.columns[1:]))
    end = skip+limit if skip+limit < len(df) else len(df)
    preview = ipysheet.column(0, [generate_preview(file) for file in df[path].iloc[skip:end]])
    cells = ipysheet.cell_range(df[df.columns[1:]].iloc[skip:end].to_numpy(), column_start=1)

    return sheet

def prev_callback(counter, w):
    batcher.decrement()
    clear_output()
    display(render_widgets())
    
def next_callback(counter, w):
    batcher.increment()
    clear_output()
    display(render_widgets())

def render_widgets():
    info = Label(value='Batch '+str(batcher.current)+' of '+str(batcher.end))
    counter = IntProgress(value=batcher.current, min=0, max=batcher.end, description='', 
                          bar_style='success', orientation='horizontal')
    progress = HBox([counter, info])

    prev_button = Button(
        description='Previous',
        disabled=False,
        button_style='', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='Previous',
    )
    prev_button.on_click(partial(prev_callback, batcher))

    next_button = Button(
        description='Next',
        disabled=False,
        button_style='success', # 'success', 'info', 'warning', 'danger' or ''
        tooltip='Next',
    )
    next_button.on_click(partial(next_callback, batcher))

    buttons = HBox([prev_button, next_button])
    nav_bar = HBox([progress, buttons], layout=Layout(display='flex', flex_row='flex', justify_content='space-between'))
    spreadsheet = generate_sheet_with_preview(df, skip=batcher.skip, limit=batcher.limit)
    
    return VBox([nav_bar, spreadsheet])

In [None]:
try:
    del(df)
except NameError:
    pass

data_dir = os.path.join('data','raw')
folders = sorted([folder for folder in os.listdir(data_dir) if '.DS_Store' not in folder])
for folder in folders:
    categories = [label for label in os.listdir(os.path.join(data_dir, folder)) if '.DS_Store' not in label]
    for category in categories:
        file = [file for file in os.listdir(os.path.join(data_dir, folder, category)) if '.DS_Store' not in file]
        path = [os.path.join(data_dir, folder, category, file) for file in 
                os.listdir(os.path.join(data_dir, folder, category)) if '.DS_Store' not in file]
        label = [category for i in range(len(path))]
        split = [folder for i in range(len(path))]
        
        try:
            df = pd.concat([df, pd.DataFrame({"path": path, "file": file, "split": split, "label": label})])
        except NameError:
            df = pd.DataFrame({"path": path, "file": file, "split": split, "label": label})

df.reset_index(drop=True, inplace=True)

df['verified'] = df['label']
df['legible'] = 1
df['translated'] = 0
print("Count:", len(df))
df.head()

In [None]:
batcher = Batcher(count=len(df))
render_widgets()