# dronio-custom-barcode-model â€” Colab training

This notebook:
- clones the repo
- installs dependencies
- (optionally) generates a small synthetic dataset
- starts TensorBoard and MLflow UI
- starts training


In [None]:
# --- Clone repo ---
!rm -rf dronio-custom-barcode-model
!git clone https://github.com/gitGut01/dronio-custom-barcode-model.git
%cd dronio-custom-barcode-model

# --- Install deps ---
!pip -q install -r requirements.txt

# Colab already includes torch/torchvision, but ensure tensorboard is present
!pip -q install tensorboard

!pip -q install flashlight-text
!pip -q install kenlm

import os
os.environ.setdefault('MLFLOW_TRACKING_URI', 'file:' + os.path.abspath('mlruns'))
print('MLFLOW_TRACKING_URI=', os.environ['MLFLOW_TRACKING_URI'])


In [None]:
# --- Option A: Load dataset zip from Google Drive ---
# 1) Upload your dataset zip to Google Drive
# 2) Set DRIVE_ZIP_PATH to that file
# 3) This will unzip it into DATA_DIR so you end up with:
#    my_dataset/train/labels.csv, my_dataset/train/images/...

from google.colab import drive
import os
import shutil

# Mount Google Drive under /content/drive
if not os.path.exists('/content/drive'):
    os.makedirs('/content/drive', exist_ok=True)
drive.mount('/content/drive')

# Path to your zip in Drive (edit this)
DRIVE_ZIP_PATH = '/content/drive/MyDrive/WarehouseDrone/Datasets/Custom_Barcode_value_dataset/1M.zip'

# Where the dataset should be available in the repo after extraction
DATA_DIR = 'my_dataset'

# Copy zip into local runtime (faster unzip) and extract
LOCAL_ZIP = 'dataset.zip'

if DRIVE_ZIP_PATH.endswith('/'):
    raise ValueError('DRIVE_ZIP_PATH must be a file path to a .zip, not a folder')

print('Copying from Drive:', DRIVE_ZIP_PATH)
shutil.copyfile(DRIVE_ZIP_PATH, LOCAL_ZIP)

# Clean target dir to avoid mixing datasets
!rm -rf "{DATA_DIR}"
!mkdir -p "{DATA_DIR}"

# Unzip
!unzip -q "{LOCAL_ZIP}" -d "{DATA_DIR}"

# Many zips contain a top-level folder; if so, flatten it.
entries = [e for e in os.listdir(DATA_DIR) if not e.startswith('.')]
if len(entries) == 1 and os.path.isdir(os.path.join(DATA_DIR, entries[0])):
    inner = os.path.join(DATA_DIR, entries[0])
    print('Detected nested folder, flattening:', inner)
    for name in os.listdir(inner):
        shutil.move(os.path.join(inner, name), os.path.join(DATA_DIR, name))
    shutil.rmtree(inner)

print('Dataset ready at:', os.path.abspath(DATA_DIR))
print('Contents:', os.listdir(DATA_DIR))


In [None]:
# --- TensorBoard ---
# Training will write events to TB_LOGDIR.
TB_LOGDIR = 'runs/barcode-transformer-ctc'
%load_ext tensorboard
%tensorboard --logdir {TB_LOGDIR}


In [None]:
# --- Train ---
# Enable MLflow + TensorBoard.
!python transformer_model/train_transformer_ctc.py \
  --data {DATA_DIR} \
  --device cuda \
  --epochs 10 \
  --batch 128 \
  --lr 3e-4 \
  --amp \
  --tb --tb-logdir {TB_LOGDIR} \
  --mlflow --mlflow-tracking-uri file:./mlruns --mlflow-experiment barcode-transformer-ctc


In [None]:
!zip -r mlruns.zip mlruns