In [None]:
!nvidia-smi

In [2]:
import fastcore.all as FC


In [3]:
if FC.IN_COLAB:
  !pip install -q flash-attn --no-build-isolation
  !pip install -q transformers accelerate datasets peft bitsandbytes pyngrok

import transformers
transformers.__version__

'4.40.2'

# Testing `Idefics` OCR for Comics
> Accuracy Enhancements for OCR in `PanelCleaner`


## Settings for Google Colab

We will install the more up-to-date version of PanelCleaner from GitHub. Only affects Colab notebooks.

In [4]:
DEV_INSTALL = True

The best way to get the images source of the experiments is to mount your Google Drive.


In [5]:
MOUNT_DRIVE = DEV_INSTALL
GDRIVE_MOUNT_POINT = 'drive'


# install

In [6]:
import os
from pathlib import Path
from rich import print as cprint
from rich.text import Text

def info(msg: str):
    text = Text(msg)
    text.stylize("bold red", 0, 6)
    cprint("_" * 10, text, "_" * 10)


if FC.IN_COLAB:
    if MOUNT_DRIVE:
        mnt_point = f"/content/{GDRIVE_MOUNT_POINT}"
        if not Path(mnt_point).exists():
            info("Mounting Google Drive")
            from google.colab import drive

            drive.mount(mnt_point, force_remount=True)


In [7]:
if FC.IN_COLAB:
    info('Installing PanelCleaner')
    if DEV_INSTALL:
        assert MOUNT_DRIVE, "DEV_INSTALL need a mounted google drive"
        info('Installing PanelCleaner from Google Drive')
        os.chdir(f"/content/{GDRIVE_MOUNT_POINT}/MyDrive/Shared/PanelCleaner/")
        !pip install -e .
    else:
        info('Installing PanelCleaner from Github')
        !pip install -q git+https://github.com/civvic/PanelCleaner.git@testbed


**PanelCleaner** is a heavy-weight and sometimes **Colab** refuses (*silently*) to install it. If  the cell below gives an error, re-run the cell abbove. That usually fixes the problem.

In [8]:
import importlib.resources
package_path = importlib.resources.files('pcleaner')
assert package_path.name == 'pcleaner'

os.chdir(package_path/'_testbed')

In [9]:
from pcleaner._testbed.testbed.experiments import ExperimentsVisor, CropMethod
from pcleaner._testbed.testbed.ocr_idefics import IdeficsExperimentContext


----
# Idefics experiments

## Experiment directory

Directory where the images reside (`EXP_DIR/source/`), the auxiliary images will be cached (`EXP_DIR/cache/`), and the experiment results will be saved. You can change the default location here.


In [10]:
EXP_DIR = Path('./experiment')
cprint(EXP_DIR)


# Setup ngrok (Colab)

The experiments can generate hundreds of images, and maintaining the **PIL** images in memory is not efficient. All the generated images are cached and visualized on demand through a URL pointing to the local cache. This approach prevents the kernel from being overloaded with **PIL** images, with the front-end responsible for fetching the image and the backend web server (not the kernel) for serving the image in another process. This method is quick and efficient. As an added bonus, the saved notebook remains lean and fit; it doesn't store the Base64 versions of all the output cell images.

Unfortunately, this approach does not work as is in **Colab**. Google Colab runs on an older Ubuntu 18.04 VM, so all the usual networking challenges with Docker, or whatever VMs Google is using, apply. Google also goes to great lengths to avoid exposing its internal architecture. We have two options:
- Let the Jupyter kernel serve the images itself, which is slow and memory-consuming.
- Use a tunnel to map localhost (server) to whatever IP and port the front-end (the browser you're currently using) is running on. We can use **ngrok** for this, but *ngrok* is a commercial service that has been abused and now requires confirmation the first time the tunnel connects, which can be inconvenient for the user. It also requires the user to open a free account and obtain an auth token.

You choose.

If the notebook is running in Colab and ngrok has been successfully installed and the tunnel has been created, the default setting is USE_PIL=False. You can set the environment variable USE_PIL=True to force the use of PIL images, but note that in certain circumstances, Colab will complain because the free tiers are usually memory constrained.

I you don't change the default settings and
- the notebook is running locally, it'll serve the images directly without any additional setup.
- the notebook is running in Colab, it'll serve the images through a web server and ngrok.


In [11]:
os.environ['USE_TUNNEL'] = 'True' if FC.IN_COLAB else 'False'
os.environ['USE_PIL'] = 'True' if FC.IN_COLAB and os.environ['USE_TUNNEL'] == 'False' else 'False'


In [12]:
SERVER = None
if os.environ['USE_PIL'].lower() == 'false' and os.environ['USE_TUNNEL'].lower() == 'true':
    import pcleaner._testbed.testbed.web_server as web_server
    SERVER = web_server.setup_ngrok(web_server.WebServerBottle, Path(EXP_DIR))


Creates the `IdeficsExperimentContext` object we'll use to manage the experiments.


In [None]:
# CONTEXT = IdeficsExperimentContext(EXP_DIR, server=SERVER)  # quantization 'bfloat16'  # Colab pro with A100 or L4, bfloat16 and FlashAttention
# CONTEXT = IdeficsExperimentContext(EXP_DIR, '4bits', server=SERVER)  # Linux, Ampere
CONTEXT = IdeficsExperimentContext(EXP_DIR, '4bits', False, server=SERVER)  # Colab Free tier, T4 GPUs don't support FlashAttention
CONTEXT.show()


# Test images


Copy your images to the source directory:


In [None]:
cprint((EXP_DIR/'source').resolve())

or download the standard set:


In [None]:
# !gdown --id 18TSXLCYAPxAlUsdHmgAe6FZM5d8K6gcT -O experiment.zip

In [None]:
# !unzip -qn experiment.zip -d .

Check the images are in place

In [14]:
[f"{i:02}: {_.name}" for i,_ in enumerate(CONTEXT.image_paths)]


['00: Action_Comics_1960-01-00_(262).JPG',
 '01: Adolf_Cap_01_008.jpg',
 '02: Barnaby_v1-028.png',
 '03: Barnaby_v1-029.png',
 '04: Buck_Danny_-_12_-_Avions_Sans_Pilotes_-_013.jpg',
 '05: Cannon-292.jpg',
 '06: Contrato_con_Dios_028.jpg',
 '07: Erase_una_vez_en_Francia_02_88.jpg',
 '08: FOX_CHILLINTALES_T17_012.jpg',
 '09: Furari_-_Jiro_Taniguchi_selma_056.jpg',
 '10: Galactus_12.jpg',
 '11: INOUE_KYOUMEN_002.png',
 '12: MCCALL_ROBINHOOD_T31_010.jpg',
 '13: MCCAY_LITTLENEMO_090.jpg',
 '14: Mary_Perkins_On_Stage_v2006_1_-_P00068.jpg',
 '15: PIKE_BOYLOVEGIRLS_T41_012.jpg',
 '16: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1.png',
 '17: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_1_K.png',
 '18: Sal_Buscema_Spaceknights_&_Superheroes_Ocular_Edition_1_2.png',
 '19: Spirou_Et_Fantasio_Integrale_06_1958_1959_0025_0024.jpg',
 '20: Strange_Tales_172005.jpg',
 '21: Strange_Tales_172021.jpg',
 '22: Tarzan_014-21.JPG',
 '23: Tintin_21_Les_Bijoux_de_la_Castafiore_page_39.jp

In [None]:
idefics_experiment = ExperimentsVisor(
                        CONTEXT,
                        'Idefics',
                        image_idx='Strange_Tales_172005.jpg',
                        box_idx=1,
                        method=CropMethod.DEFAULT_GREY_PAD
                    )
idefics_experiment


----

In [None]:
CONTEXT.cleanup_model()

if SERVER is not None:
    SERVER.stop()
    SERVER = None
    os.environ['USE_TUNNEL'] = 'False'
