# Installation

In [None]:
!git clone https://github.com/yeungchenwa/OCR-SAM.git

In [None]:
!pip -qq install torch==1.12.1+cu113 torchvision==0.13.1+cu113 --extra-index-url https://download.pytorch.org/whl/cu113

!pip -qq install -U openmim
!mim install mmengine
!mim install mmocr

# In Window, the following symbol ' should be changed to "
!mim install 'mmcv==2.0.0rc4'
!mim install 'mmdet==3.0.0rc5'
!mim install 'mmcls==1.0.0rc5'

# Install sam
!pip -qq install git+https://github.com/facebookresearch/segment-anything.git

# Install required packages
!pip -qq install -r ./OCR-SAM/requirements.txt

# Install Gradio
!pip -qq install gradio

# Install the diffusers
!pip -qq install diffusers

# Install the pytorch_lightning for ldm
!pip -qq install pytorch-lightning==2.0.1.post0

# Get model checkpoints

In [None]:
# download checkpoint for DBNet++

!wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1r3B1xhkyKYcQ9SR7o9hw9zhNJinRiHD-' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1r3B1xhkyKYcQ9SR7o9hw9zhNJinRiHD-" -O db_swin_mix_pretrain.pth && rm -rf /tmp/cookies.txt

# configure directories
!mkdir checkpoints
!mkdir checkpoints/mmocr
!mkdir checkpoints/sam
!mkdir checkpoints/ldm
!mv db_swin_mix_pretrain.pth checkpoints/mmocr

# mmocr recognizer ckpt
!wget -O checkpoints/mmocr/abinet_20e_st-an_mj_20221005_012617-ead8c139.pth https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_20e_st-an_mj/abinet_20e_st-an_mj_20221005_012617-ead8c139.pth

# sam ckpt, more details: https://github.com/facebookresearch/segment-anything#model-checkpoints
!wget -O checkpoints/sam/sam_vit_h_4b8939.pth https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth

# ldm ckpt
!wget -O checkpoints/ldm/last.ckpt https://heibox.uni-heidelberg.de/f/4d9ac7ea40c64582b7c9/?dl=1

!mv checkpoints OCR-SAM

# Install latent-diffusion module

In [None]:
# install latent-diffusion module
import sys
sys.path.append('/content/OCR-SAM/latent_diffusion')

In [None]:
%%shell

cd /content/OCR-SAM/latent_diffusion
wget https://github.com/CompVis/latent-diffusion/raw/main/setup.py
pip -qq install -e .

In [None]:
# configure huggingface
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# configure wandb
!pip -qq install wandb
import wandb

run = wandb.init(
  project="text2image-fine-tune",
  notes="text removal",
)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Run Text Removal on Synthetic Images

Python script

In [None]:
%%shell
cd /content/OCR-SAM/
for filepath in results_2024_02_17/*;
do python mmocr_sam_erase.py --inputs /content/OCR-SAM/$filepath \
                    --outdir /content/drive/MyDrive/TextRemoval/text-removal-2024_02_17/${filepath} \
                    --device cuda --use_sam True --dilate_iteration 2 --diffusion_model stable-diffusion \
                    --sd_ckpt "stabilityai/stable-diffusion-2-inpainting"
done

In [None]:
import os
import glob
import shutil
from tqdm import tqdm

In [None]:
success = []

# loop through output directory
for imagefolder in tqdm(glob.glob('/content/drive/MyDrive/TextRemoval/text-removal-2024_02_17/*')):

  # list all files
  filenames = glob.glob(os.path.join(imagefolder, '*'))

  # if erase output exists
  if 'erase_output.jpg' in [os.path.basename(path) for path in filenames]:

    # record successful text removal
    success.append(os.path.basename(imagefolder))

    # move erase output
    shutil.copy(os.path.join(imagefolder, 'erase_output.jpg'), os.path.join('/content/drive/MyDrive/TextRemoval/clean_images', f'{os.path.basename(imagefolder)}.png'))

# write successful filenames to .txt
open('/content/drive/MyDrive/TextRemoval/successful_removal.txt', 'w').writelines([line + '\n' for line in success])

Compile final directory of images (success + fail)

In [None]:
# loop through clean images directory
clean_images = os.listdir('/content/drive/MyDrive/TextRemoval/clean_images/')

for i in tqdm(range(0, 1000)):

  # check if text removal was successful
  if f'{i:04d}.png' in clean_images:

    # move erase output
    shutil.copy(os.path.join('/content/drive/MyDrive/TextRemoval/clean_images', f'{i:04d}.png'), os.path.join('/content/drive/MyDrive/TextRemoval/merged_output', f'{i:04d}.png'))

  else:

    # move original synth image
    shutil.copy(os.path.join('/content/results_2023_06_28', f'synth_{i:04d}.png'), os.path.join('/content/drive/MyDrive/TextRemoval/merged_output', f'{i:04d}.png'))

Write merged output directory to wandb run

In [None]:
table = wandb.Table(columns=["id", "image"])
[table.add_data(img.rstrip('.png'), wandb.Image(img)) for img in glob.glob('/content/drive/MyDrive/TextRemoval/merged_output/*')]
wandb.log({"images" : table})

In [None]:
[wandb.log({"images" : wandb.Image(filepath)}) for filepath in glob.glob('/content/drive/MyDrive/TextRemoval/merged_output/*')]

In [None]:
!pip install pydicom
from pydicom import dcmread

In [None]:
from PIL import Image
import numpy as np

ds = dcmread('/content/1-1.dcm')

In [None]:
def dicom_to_gray(ds):

    raw = ds.pixel_array
    raw = raw.astype(float) # convert to float
    raw = (np.maximum(raw,0) / raw.max()) * 255.0 # scale between 0-255
    raw = np.uint8(raw) # convert to uint

    return raw

In [None]:
im = Image.fromarray(dicom_to_gray(ds))
im.save('/content/test.png')

In [None]:
%%shell
cd /content/OCR-SAM/
python mmocr_sam_erase.py --inputs /content/test.png \
      --outdir /content/output_without_sam \
      --device cuda --use_sam False --dilate_iteration 2 --diffusion_model stable-diffusion \
      --sd_ckpt "stabilityai/stable-diffusion-2-inpainting"

# Web app

In [None]:
%%shell
cd /content/OCR-SAM/
python mmocr_sam_erase_app.py

In [None]:
# !python /content/OCR-SAM/mmocr_sam_inpainting_app.py