# Notebook 03 - Preprocessing + Augmentation Strategy (Detection + OCR)

This notebook demonstrates:
- sample visualizations from the dataset
- scenario augmentations (blur, low light, glare, rotation)
- OCR preprocessing steps (CLAHE, thresholding, denoise)
These steps improve robustness for real-world ALPR use cases.


In [7]:
!pip install -q roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="ATUHTf9DF0DoAlA3Tfeg")
project = rf.workspace("nivu").project("indian-license-plate-knte7")
version = project.version(1)
dataset = version.download("yolov8")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/91.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━[0m [32m81.9/91.8 kB[0m [31m2.6 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.8/91.8 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.8/66.8 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.9/49.9 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m66.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m81.1 MB/s[0m eta [36m0:00:00[0m
loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Indian-License-Plate-1 to yolov8:: 100%|██████████| 43537/43537 [00:00<00:00, 60701.14it/s]





Extracting Dataset Version Zip to Indian-License-Plate-1 in yolov8:: 100%|██████████| 3312/3312 [00:00<00:00, 8314.59it/s]


In [8]:
import os
DATASET_DIR = dataset.location
print("DATASET_DIR =", DATASET_DIR)
print(os.listdir(DATASET_DIR))


DATASET_DIR = /content/Indian-License-Plate-1
['README.dataset.txt', 'train', 'valid', 'README.roboflow.txt', 'test', 'data.yaml']


In [9]:
import glob,random,os

train_img_dir=os.path.join(DATASET_DIR,"train","images")
imgs=glob.glob(os.path.join(train_img_dir,"*.jpg"))+glob.glob(os.path.join(train_img_dir,"*.png"))+glob.glob(os.path.join(train_img_dir,"*.jpeg"))
random.shuffle(imgs)

print("Total train images:",len(imgs))
sample=imgs[:3]
sample


Total train images: 1156


['/content/Indian-License-Plate-1/train/images/HP5_jpg.rf.43109b11c9387578aba08c6c8751fda7.jpg',
 '/content/Indian-License-Plate-1/train/images/video11_1120_jpg.rf.efd4c3ed104c2920b85b38e124dfba3f.jpg',
 '/content/Indian-License-Plate-1/train/images/28fc10e3-681b-4086-9c15-28934ae86f7e___3e7fd381-0ae5-4421-8a70-279ee0ec1c61_nissan-terrano-amt_827x510_71478504527_jpg.rf.480b5c6067a182f1b8868cc78d9c3ed2.jpg']

In [5]:
train_img_dir=os.path.join(DATASET_DIR,"train","images")
imgs=glob.glob(os.path.join(train_img_dir,"*.jpg"))+glob.glob(os.path.join(train_img_dir,"*.png"))+glob.glob(os.path.join(train_img_dir,"*.jpeg"))
random.shuffle(imgs)

print("Total train images:",len(imgs))
sample=imgs[:3]
sample


Total train images: 0


[]

In [10]:
from google.colab import drive
drive.mount("/content/drive")


Mounted at /content/drive


Once again from first using Drive Connected , not using dataset to be download all the time:-

In [11]:
!pip install -q roboflow
from roboflow import Roboflow

rf=Roboflow(api_key="ATUHTf9DF0DoAlA3Tfeg")
project=rf.workspace("nivu").project("indian-license-plate-knte7")
version=project.version(1)
dataset=version.download("yolov8")

DATASET_DIR=dataset.location
print("DATASET_DIR =",DATASET_DIR)


loading Roboflow workspace...
loading Roboflow project...
DATASET_DIR = /content/Indian-License-Plate-1


In [12]:
import os
DRIVE_SAVE_DIR="/content/drive/MyDrive/ALPR_DATASET"
os.makedirs(DRIVE_SAVE_DIR,exist_ok=True)

!cp -r "{DATASET_DIR}" "{DRIVE_SAVE_DIR}/"
print("Copied to:",DRIVE_SAVE_DIR)


Copied to: /content/drive/MyDrive/ALPR_DATASET


In [13]:
import os
print(os.listdir("/content/drive/MyDrive/ALPR_DATASET"))


['Indian-License-Plate-1']


In [14]:
print(os.listdir("/content/drive/MyDrive/ALPR_DATASET/Indian-License-Plate-1"))


['data.yaml', 'README.dataset.txt', 'README.roboflow.txt', 'test', 'train', 'valid']


Check : -

In [15]:
import glob
print("Train images:",len(glob.glob("/content/drive/MyDrive/ALPR_DATASET/Indian-License-Plate-1/train/images/*")))


Train images: 1156


In [16]:
DATASET_DIR="/content/drive/MyDrive/ALPR_DATASET/Indian-License-Plate-1"
print("DATASET_DIR =",DATASET_DIR)


DATASET_DIR = /content/drive/MyDrive/ALPR_DATASET/Indian-License-Plate-1


In [17]:
import os,glob,random

train_img_dir=os.path.join(DATASET_DIR,"train","images")
imgs=glob.glob(os.path.join(train_img_dir,"*.jpg"))+glob.glob(os.path.join(train_img_dir,"*.png"))+glob.glob(os.path.join(train_img_dir,"*.jpeg"))
random.shuffle(imgs)

print("Total train images:",len(imgs))
sample=imgs[:3]
sample


Total train images: 1156


['/content/drive/MyDrive/ALPR_DATASET/Indian-License-Plate-1/train/images/KA3_jpg.rf.be6dc420793579cbdf7f5c9a21893af0.jpg',
 '/content/drive/MyDrive/ALPR_DATASET/Indian-License-Plate-1/train/images/BR6_jpg.rf.68371dd4f13f48c18b5222b51dd88b38.jpg',
 '/content/drive/MyDrive/ALPR_DATASET/Indian-License-Plate-1/train/images/car-wbs-KL10AW2814_00000_jpeg.rf.5944671bad395d448e6128294bcd9b20.jpg']