In [1]:
# GPU check
!nvidia-smi                                             # should list a Tesla card

# === 1.1  Pick ONE of the two blocks below ===

# A) Use a compatible Torch that still contains _accumulate (simplest)
!pip install -q torch==2.2.0+cu118 torchvision==0.17.0+cu118 \
               --extra-index-url https://download.pytorch.org/whl/cu118  # :contentReference[oaicite:0]{index=0}

Mon Jun  2 23:44:49 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off |   00000000:00:04.0 Off |                    0 |
| N/A   42C    P0             49W /  400W |       0MiB /  40960MiB |      0%      Default |
|                                         |                        |             Disabled |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import zipfile
with zipfile.ZipFile('/content/drive/MyDrive/crops.zip', 'r') as zip_ref:
    zip_ref.extractall('crops')

In [4]:
!git clone --depth 1 https://github.com/clovaai/deep-text-recognition-benchmark.git
%cd /content/deep-text-recognition-benchmark            # %cd persists in Colab sessions :contentReference[oaicite:2]{index=2}

# If you kept the newest Torch, replace the broken import:
!sed -i "s/from torch._utils import _accumulate/from itertools import accumulate as _accumulate/" dataset.py  # :contentReference[oaicite:3]{index=3}

fatal: destination path 'deep-text-recognition-benchmark' already exists and is not an empty directory.
[Errno 2] No such file or directory: '/content/deep-text-recognition-benchmark # %cd persists in Colab sessions :contentReference[oaicite:2]{index=2}'
/content
sed: can't read dataset.py: No such file or directory


In [5]:
import pandas as pd, pathlib, random, math, os, textwrap

DATA_ROOT = 'crops/crops'                # folder with images + ocr_data.csv
CSV       = f'{DATA_ROOT}/ocr_data.csv'
WORKDIR   = '/content/my_lmdb_work'
os.makedirs(WORKDIR, exist_ok=True)

df = pd.read_csv(CSV)                       # needs columns: filename, words
df['path'] = df['filename'].apply(lambda x: f'{DATA_ROOT}/{x}')
df = df[df['path'].apply(lambda p: pathlib.Path(p).exists())]  # drop missing

# split 90 / 10
paths = df.sample(frac=1, random_state=42)  # shuffle
cut   = math.floor(len(paths)*0.9)
train_df, val_df = paths[:cut], paths[cut:]

for name, subset in [('train',train_df), ('val',val_df)]:
    with open(f'{WORKDIR}/{name}.txt','w') as f:
        for _,r in subset.iterrows():
            f.write(f"{r['path']}\t{r['words']}\n")
print(textwrap.dedent(f"""
    ✔️  wrote {len(train_df)} lines  -> train.txt
    ✔️  wrote {len(val_df)} lines    -> val.txt
"""))


✔️  wrote 517 lines  -> train.txt
✔️  wrote 58 lines    -> val.txt



In [6]:
!pip install -q fire lmdb

# build training LMDB
!python deep-text-recognition-benchmark/create_lmdb_dataset.py \
        --inputPath /content \
        --gtFile    /content/my_lmdb_work/train.txt \
        --outputPath /content/my_lmdb_work/lmdb_train               # :contentReference[oaicite:6]{index=6}

# build validation LMDB
!python deep-text-recognition-benchmark/create_lmdb_dataset.py \
        --inputPath /content \
        --gtFile    /content/my_lmdb_work/val.txt \
        --outputPath /content/my_lmdb_work/lmdb_val


Created dataset with 517 samples
Created dataset with 58 samples


In [7]:
!wget https://github.com/JaidedAI/EasyOCR/releases/download/pre-v1.1.6/craft_mlt_25k.zip
!unzip craft_mlt_25k.zip

--2025-06-02 23:44:59--  https://github.com/clovaai/deep-text-recognition-benchmark/releases/download/0.1.0/TPS-ResNet-BiLSTM-Attn.pth
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 404 Not Found
2025-06-02 23:45:00 ERROR 404: Not Found.



In [12]:
!pip install numpy==1.26.4
!python deep-text-recognition-benchmark/train.py --valInterval 100 --num_iter 500 \
  --exp_name 'crops_run' \
  --train_data ./my_lmdb_work/lmdb_train --valid_data ./my_lmdb_work/lmdb_val \
  --select_data 'train' --batch_ratio 1 \
  --Transformation TPS --FeatureExtraction ResNet --SequenceModeling BiLSTM --Prediction Attn \
  --batch_size 192 --workers 2 \
  --imgH 32 --imgW 100 \
  --batch_max_length 25 \
  --character "0123456789" \
  --saved_model craft_mlt_25k.pth --FT


Filtering the images containing characters which are not in opt.character
Filtering the images whose label is longer than opt.batch_max_length
--------------------------------------------------------------------------------
dataset_root: ./my_lmdb_work/lmdb_train
opt.select_data: ['train']
opt.batch_ratio: ['1']
--------------------------------------------------------------------------------
dataset_root:    ./my_lmdb_work/lmdb_train	 dataset: train
sub-directory:	/.	 num samples: 517
num total samples of train: 517 x 1.0 (total_data_usage_ratio) = 517
num samples of train per batch: 192 x 1.0 (batch_ratio) = 192
--------------------------------------------------------------------------------
Total_batch_size: 192 = 192
--------------------------------------------------------------------------------
dataset_root:    ./my_lmdb_work/lmdb_val	 dataset: /
sub-directory:	/.	 num samples: 58
--------------------------------------------------------------------------------
model input paramete