# Enhanced License Plate Detection and Classification

This notebook implements an improved two-stage approach for license plate recognition:
1. License plate detection - extracting the plate from the full image
2. License plate classification - classifying the state code

This solves the problem of losing important details when resizing full car images to 128x128.

In [1]:
# Import required libraries
import os
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, Add, Input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from PIL import Image
import cv2
import imutils
from sklearn.model_selection import train_test_split
import shutil

ModuleNotFoundError: No module named 'tensorflow'

## 1. License Plate Detection

First, we'll implement functions to detect and extract license plates from car images.

In [None]:
def detect_license_plate(image_path):
    """Detect and extract license plate from car image"""
    # Read the image
    image = cv2.imread(image_path)
    if image is None:
        return None, None
    
    # Save original image for display
    orig_image = image.copy()
    
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply bilateral filter to remove noise while keeping edges sharp
    gray = cv2.bilateralFilter(gray, 11, 17, 17)
    
    # Find edges
    edged = cv2.Canny(gray, 30, 200)
    
    # Find contours
    cnts = cv2.findContours(edged.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:10]
    
    # Initialize license plate contour
    plate_cnt = None
    
    # Loop over contours to find the license plate
    for c in cnts:
        # Approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
        
        # If the contour has 4 corners, we might have found the license plate
        if len(approx) == 4:
            plate_cnt = approx
            break
    
    if plate_cnt is None:
        # If no rectangle with 4 corners found, try detecting based on ratio
        for c in cnts:
            x, y, w, h = cv2.boundingRect(c)
            aspect_ratio = float(w) / h
            
            # License plates typically have an aspect ratio of ~2-4
            if 1.5 <= aspect_ratio <= 5 and w > 100 and h > 20:
                plate_cnt = c
                break
    
    if plate_cnt is None:
        # If still can't find plate, assume bottom third of image
        h, w = image.shape[:2]
        y = int(h * 2/3)
        plate_image = image[y:, :]
        
        # Mark the bottom third in the original image
        cv2.rectangle(orig_image, (0, y), (w, h), (0, 255, 0), 2)
        
        return plate_image, orig_image
    
    # Get coordinates of the license plate
    x, y, w, h = cv2.boundingRect(plate_cnt)
    
    # Extract the license plate
    plate_image = image[y:y+h, x:x+w]
    
    # Draw rectangle around the license plate on original image (for visualization)
    cv2.rectangle(orig_image, (x, y), (x + w, y + h), (0, 255, 0), 2)
    
    return plate_image, orig_image

## 2. Improved Preprocessing with Plate Detection

Now we'll create a preprocessing pipeline that detects the license plate before resizing.

In [None]:
def basic_preprocess_image(img_path):
    """Original basic preprocessing function"""
    img = Image.open(img_path).convert("L")  # Convert to grayscale
    img = img.resize((128, 128))  # Resize
    img_array = np.array(img)
    img_array = img_array / 255.0  # Normalize
    img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
    img_array = np.expand_dims(img_array, axis=-1)  # Add channel dimension
    return img_array

def improved_preprocess_image(img_path):
    """Improved preprocessing pipeline with license plate detection"""
    # First detect the license plate
    plate_image, marked_image = detect_license_plate(img_path)
    
    if plate_image is None:
        # Fallback to original method if detection fails
        return basic_preprocess_image(img_path), None
    
    # Convert OpenCV BGR to RGB for PIL
    plate_image_rgb = cv2.cvtColor(plate_image, cv2.COLOR_BGR2RGB)
    
    # Convert to PIL image
    pil_image = Image.fromarray(plate_image_rgb)
    
    # Convert to grayscale
    pil_image = pil_image.convert("L")
    
    # Resize to 128x128 (now we're only resizing the plate region, not the whole car)
    pil_image = pil_image.resize((128, 128))
    
    # Convert to numpy array
    img_array = np.array(pil_image)
    
    # Normalize
    img_array = img_array / 255.0
    
    # Add batch and channel dimensions
    img_array = np.expand_dims(img_array, axis=0)
    img_array = np.expand_dims(img_array, axis=-1)
    
    return img_array, marked_image

## 3. Enhanced CNN Architecture

Let's implement an improved CNN architecture with batch normalization and residual connections.

In [None]:
def create_improved_model(input_shape=(128, 128, 1), num_classes=29):
    """Create an improved CNN model for license plate classification"""
    
    inputs = Input(shape=input_shape)
    
    # First block
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(inputs)
    x = BatchNormalization()(x)
    x = Conv2D(32, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    # Second block with residual connection
    shortcut = Conv2D(64, (1, 1), padding='same')(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Add()([x, shortcut])  # Residual connection
    x = MaxPooling2D((2, 2))(x)
    
    # Third block
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D((2, 2))(x)
    
    # Fully connected layers
    x = Flatten()(x)
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

## 4. License Plate-Specific Data Augmentation

Standard data augmentation may distort license plates too much. Let's create specialized augmentation for plates.

In [None]:
# Create data augmentation specifically for license plates
plate_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=5,  # License plates won't rotate much
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    brightness_range=[0.8, 1.2],  # Vary brightness
    fill_mode='constant',  # Use black padding
    cval=0  # Black color for padding
)

# Regular validation data generator
val_datagen = ImageDataGenerator(rescale=1./255)

## 5. Prepare and Process the Dataset

Let's set up a function to process our dataset with the improved preprocessing pipeline.

In [None]:
def prepare_improved_dataset(input_dir, output_dir, detect_plates=True):
    """Process dataset with license plate detection"""
    os.makedirs(output_dir, exist_ok=True)
    
    # Count for statistics
    total_images = 0
    detected_plates = 0
    failed_detections = 0
    
    # Process each class folder
    for class_folder in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_folder)
        
        # Skip if not a directory
        if not os.path.isdir(class_path):
            continue
            
        # Create output class folder
        output_class_path = os.path.join(output_dir, class_folder)
        os.makedirs(output_class_path, exist_ok=True)
        
        # Process each image in the class folder
        for img_file in os.listdir(class_path):
            if not img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                continue
                
            total_images += 1
            img_path = os.path.join(class_path, img_file)
            
            try:
                if detect_plates:
                    # Detect and extract license plate
                    plate_img, _ = detect_license_plate(img_path)
                    
                    if plate_img is not None:
                        # Convert to grayscale and resize
                        plate_rgb = cv2.cvtColor(plate_img, cv2.COLOR_BGR2RGB)
                        pil_img = Image.fromarray(plate_rgb).convert("L").resize((128, 128))
                        detected_plates += 1
                    else:
                        # Fallback to original image if plate detection fails
                        pil_img = Image.open(img_path).convert("L").resize((128, 128))
                        failed_detections += 1
                else:
                    # Just convert to grayscale and resize without plate detection
                    pil_img = Image.open(img_path).convert("L").resize((128, 128))
                
                # Save processed image
                output_path = os.path.join(output_class_path, img_file)
                pil_img.save(output_path)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                failed_detections += 1
    
    print(f"Processed {total_images} images")
    if detect_plates:
        print(f"Successfully detected {detected_plates} plates ({detected_plates/total_images*100:.1f}%)")
        print(f"Failed to detect {failed_detections} plates ({failed_detections/total_images*100:.1f}%)")
    
    return output_dir

## 6. Complete End-to-End Pipeline

Now let's combine everything into a complete end-to-end pipeline.

In [None]:
def classify_license_plate(img_path, model, class_labels):
    """End-to-end pipeline for license plate detection and classification"""
    # Step 1: Detect and preprocess the license plate
    img_tensor, marked_image = improved_preprocess_image(img_path)
    
    # Step 2: Make prediction
    prediction = model.predict(img_tensor)
    
    # Step 3: Get the predicted class
    predicted_class_index = np.argmax(prediction, axis=1)
    predicted_class = class_labels[predicted_class_index[0]]
    confidence = prediction[0][predicted_class_index[0]]
    
    # Step 4: Visualize results
    plt.figure(figsize=(12, 6))
    
    plt.subplot(1, 2, 1)
    plt.title("Original with Detected Plate")
    if marked_image is not None:
        plt.imshow(cv2.cvtColor(marked_image, cv2.COLOR_BGR2RGB))
    else:
        plt.imshow(plt.imread(img_path))
    plt.axis('off')
    
    plt.subplot(1, 2, 2)
    plt.title(f"Processed Plate\nPredicted: {predicted_class} ({confidence:.2f})")
    plt.imshow(img_tensor[0, :, :, 0], cmap='gray')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return predicted_class, confidence

## 7. Train the Improved Model

Now let's set up the training process for our improved model.

In [None]:
# These paths may need to be adjusted based on your actual data location
train_data_path = 'final/train'  # Path to original training data
test_data_path = 'final/test'    # Path to original test data

# Create improved dataset directories
improved_train_path = 'improved_data/train'
improved_test_path = 'improved_data/test'

# Process the datasets with license plate detection
# Note: Only run this if you have the actual data files available
# Comment out if you're just exploring the code

# Process train data
# prepare_improved_dataset(train_data_path, improved_train_path, detect_plates=True)

# Process test data
# prepare_improved_dataset(test_data_path, improved_test_path, detect_plates=True)

In [None]:
# Set up the data generators
# Note: Only run this cell if you've prepared the improved data directories

# Define image dimensions and batch size
img_size = (128, 128)
batch_size = 32

'''
# Training data generator with augmentation
train_gen = plate_datagen.flow_from_directory(
    improved_train_path,
    target_size=img_size,
    color_mode='grayscale',
    batch_size=batch_size,
    class_mode='categorical'
)

# Test data generator (no augmentation needed for validation)
test_gen = val_datagen.flow_from_directory(
    improved_test_path,
    target_size=img_size,
    color_mode='grayscale',
    batch_size=batch_size,
    class_mode='categorical'
)

# Get class indices and number of classes
class_indices = train_gen.class_indices
class_labels = list(class_indices.keys())
num_classes = len(class_indices)

print(f"Found {num_classes} classes: {class_labels}")
'''

In [None]:
# Create and train the improved model
# Note: This cell is commented out to avoid running intensive training unintentionally

'''
# Create the model
improved_model = create_improved_model(input_shape=(128, 128, 1), num_classes=num_classes)

# Display model summary
improved_model.summary()

# Set up callbacks
model_checkpoint = ModelCheckpoint(
    "improved_plate_model.h5",
    monitor="val_accuracy",
    verbose=1,
    save_best_only=True,
    mode="max"
)

early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

# Train the model
history = improved_model.fit(
    train_gen,
    epochs=50,  # Adjust as needed
    validation_data=test_gen,
    callbacks=[model_checkpoint, early_stopping]
)
'''

## 8. Load and Test the Model

Now let's test our model on some sample images.

In [None]:
# Load the original model for comparison
# Note: This assumes you have trained models available

'''
# Load original model
original_model = load_model("final_model.h5")

# Load improved model (if you've trained it)
# improved_model = load_model("improved_plate_model.h5")

# Define class labels (these should match your actual model's training classes)
class_labels = ['AP', 'AR', 'AS', 'BR', 'CG', 'DL', 'GA', 'GJ', 'HR', 'HP', 
                 'JH', 'KA', 'KL', 'MP', 'MH', 'MN', 'ML', 'MZ', 'NL', 'OD', 
                 'PB', 'RJ', 'SK', 'TN', 'TS', 'TR', 'UP', 'UK', 'WB']
'''

In [None]:
# Test on a sample image
# Replace with your actual test image path

'''
test_image_path = "path/to/your/test/image.jpg"
predicted_class, confidence = classify_license_plate(test_image_path, original_model, class_labels)
print(f"Predicted state: {predicted_class} with confidence: {confidence:.2f}")
'''

## 9. Compare Original vs. Improved Performance

Let's compare the performance of the original and improved methods.

In [None]:
def compare_models(image_path, original_model, improved_model, class_labels):
    """Compare original and improved models on the same image"""
    # Original method
    start_time = time.time()
    orig_img = basic_preprocess_image(image_path)
    orig_pred = original_model.predict(orig_img)
    orig_class_idx = np.argmax(orig_pred, axis=1)[0]
    orig_class = class_labels[orig_class_idx]
    orig_confidence = orig_pred[0][orig_class_idx]
    orig_time = time.time() - start_time
    
    # Improved method
    start_time = time.time()
    img_tensor, marked_image = improved_preprocess_image(image_path)
    improved_pred = improved_model.predict(img_tensor)
    improved_class_idx = np.argmax(improved_pred, axis=1)[0]
    improved_class = class_labels[improved_class_idx]
    improved_confidence = improved_pred[0][improved_class_idx]
    improved_time = time.time() - start_time
    
    # Display results
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    
    # Original image
    axes[0, 0].imshow(plt.imread(image_path))
    axes[0, 0].set_title("Original Image")
    axes[0, 0].axis('off')
    
    # Original processed
    axes[0, 1].imshow(orig_img[0, :, :, 0], cmap='gray')
    axes[0, 1].set_title(f"Original Method\nPredicted: {orig_class} ({orig_confidence:.2f})")
    axes[0, 1].axis('off')
    
    # Improved with plate detection
    if marked_image is not None:
        axes[1, 0].imshow(cv2.cvtColor(marked_image, cv2.COLOR_BGR2RGB))
    else:
        axes[1, 0].imshow(plt.imread(image_path))
    axes[1, 0].set_title("Detected License Plate")
    axes[1, 0].axis('off')
    
    # Improved processed
    axes[1, 1].imshow(img_tensor[0, :, :, 0], cmap='gray')
    axes[1, 1].set_title(f"Improved Method\nPredicted: {improved_class} ({improved_confidence:.2f})")
    axes[1, 1].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print(f"Original method: {orig_class} (confidence: {orig_confidence:.2f}, time: {orig_time:.3f}s)")
    print(f"Improved method: {improved_class} (confidence: {improved_confidence:.2f}, time: {improved_time:.3f}s)")

In [None]:
import time

# Test comparison on a sample image
# Note: This requires both models to be loaded

'''
test_image_path = "path/to/your/test/image.jpg"
compare_models(test_image_path, original_model, improved_model, class_labels)
'''

## 10. Integration with Mobile App

To integrate this improved model with your React Native app, you'll need to convert it to TensorFlow.js format.

In [None]:
# Convert model to TensorFlow.js format for mobile app
# Note: This requires the tensorflowjs package to be installed

'''
!pip install tensorflowjs

import tensorflowjs as tfjs

# Path for the exported model
export_path = "mobile_app_model"

# Convert model to TensorFlow.js format
tfjs.converters.save_keras_model(improved_model, export_path)
print(f"Model exported to {export_path} for use in React Native app")
'''

## 11. Conclusion

This improved approach solves the problem of losing important details when resizing full car images by:
1. First detecting and extracting just the license plate region
2. Then resizing only the plate to 128x128
3. Using an improved CNN with batch normalization and residual connections
4. Applying specialized data augmentation for license plates

This results in higher accuracy, especially for images where the license plate is a small part of the overall image or is located at the bottom of the frame.

## Data Preparation and Extraction

First, we need to extract and set up our dataset from the zip file.

In [None]:
# Extract the data from num+plate.zip
import zipfile
import os
import shutil

# Define paths
zip_path = '/content/num+plate.zip'  # Path to your zip file
extract_to = 'dataset'

# Create extraction directory
os.makedirs(extract_to, exist_ok=True)

# Extract the zip file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print(f"Extracted {zip_path} to {extract_to}/")

In [None]:
# Create directories for augmented and processed data
aug_data_dir = "aug_data"
processed_train_dir = "processed/train"
processed_test_dir = "processed/test"
final_train_dir = "final/train"
final_test_dir = "final/test"
improved_train_dir = "improved_data/train"
improved_test_dir = "improved_data/test"

os.makedirs(aug_data_dir, exist_ok=True)
os.makedirs(processed_train_dir, exist_ok=True)
os.makedirs(processed_test_dir, exist_ok=True)
os.makedirs(final_train_dir, exist_ok=True)
os.makedirs(final_test_dir, exist_ok=True)
os.makedirs(improved_train_dir, exist_ok=True)
os.makedirs(improved_test_dir, exist_ok=True)

In [None]:
# Check the structure of the extracted data
num_plate_folder = os.path.join(extract_to, "num plate")

if os.path.exists(num_plate_folder):
    # List all class folders
    class_folders = [f for f in os.listdir(num_plate_folder) 
                    if os.path.isdir(os.path.join(num_plate_folder, f))]
    
    print(f"Found {len(class_folders)} classes in the dataset:")
    for i, folder in enumerate(class_folders):
        class_path = os.path.join(num_plate_folder, folder)
        num_images = len([f for f in os.listdir(class_path) 
                         if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
        print(f"  {i+1}. {folder}: {num_images} images")
else:
    print(f"Error: Could not find folder {num_plate_folder}")
    print("Please check the structure of your zip file.")

## Data Augmentation

Now let's augment the data to increase our sample size.

In [None]:
# Augment the dataset similar to the original notebook
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest'
)

num_plate_folder = os.path.join(extract_to, "num plate")

# Process each class folder
for class_folder in os.listdir(num_plate_folder):
    class_path = os.path.join(num_plate_folder, class_folder)
    aug_class_path = os.path.join(aug_data_dir, class_folder)
    os.makedirs(aug_class_path, exist_ok=True)
    
    # Skip if not a directory
    if not os.path.isdir(class_path):
        continue
    
    # Copy original images
    images = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    for img_name in images:
        try:
            shutil.copy(os.path.join(class_path, img_name), os.path.join(aug_class_path, img_name))
        except Exception as e:
            print(f"Error copying {img_name}: {e}")
    
    # Add augmented images
    aug_count = 0
    img_cycle = iter(images)
    
    while aug_count < 100:
        try:
            img_name = next(img_cycle)
        except StopIteration:
            img_cycle = iter(images)
            img_name = next(img_cycle)
            
        try:
            img_path = os.path.join(class_path, img_name)
            img = Image.open(img_path).convert("RGB").resize((256, 256))
            img_array = np.expand_dims(np.array(img), 0)
            aug_iter = datagen.flow(img_array, batch_size=1)
            aug_img = next(aug_iter)[0].astype(np.uint8)
            aug_pil = Image.fromarray(aug_img)
            aug_pil.save(os.path.join(aug_class_path, f"{img_name.split('.')[0]}_extra{aug_count}.jpg"))
            aug_count += 1
        except Exception as e:
            print(f"Error augmenting {img_name}: {e}")
            continue
    
    print(f"Processed {class_folder}: {len(images)} original + {aug_count} augmented images")

In [None]:
# Split into train and test sets
from sklearn.model_selection import train_test_split

for class_folder in os.listdir(aug_data_dir):
    class_path = os.path.join(aug_data_dir, class_folder)
    
    # Skip if not a directory
    if not os.path.isdir(class_path):
        continue
    
    # Get all images in this class
    images = [f for f in os.listdir(class_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
    
    if len(images) == 0:
        print(f"No images found for class {class_folder}. Skipping...")
        continue
    
    # Split into train and test
    train_imgs, test_imgs = train_test_split(images, test_size=0.05, random_state=42)
    
    # Create class folders in train and test directories
    os.makedirs(os.path.join(processed_train_dir, class_folder), exist_ok=True)
    os.makedirs(os.path.join(processed_test_dir, class_folder), exist_ok=True)
    
    # Copy images to respective folders
    for img in train_imgs:
        shutil.copy(os.path.join(class_path, img), os.path.join(processed_train_dir, class_folder, img))
    
    for img in test_imgs:
        shutil.copy(os.path.join(class_path, img), os.path.join(processed_test_dir, class_folder, img))
    
    print(f"{class_folder}: {len(train_imgs)} training images, {len(test_imgs)} test images")

In [None]:
# Basic preprocessing without plate detection (for comparison)
def preprocess_basic_dataset(input_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for class_folder in os.listdir(input_dir):
        class_path = os.path.join(input_dir, class_folder)
        save_path = os.path.join(output_dir, class_folder)
        
        if not os.path.isdir(class_path):
            continue
            
        os.makedirs(save_path, exist_ok=True)
        
        for img_file in os.listdir(class_path):
            if not img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                continue
                
            try:
                img_path = os.path.join(class_path, img_file)
                img = Image.open(img_path).convert("L")
                img = img.resize((128, 128))
                img.save(os.path.join(save_path, img_file))
            except Exception as e:
                print(f"Error processing {img_path}: {e}")

# Apply basic preprocessing to create the final dataset
preprocess_basic_dataset(processed_train_dir, final_train_dir)
preprocess_basic_dataset(processed_test_dir, final_test_dir)

print("Basic preprocessing completed")

## Now Apply the Enhanced License Plate Detection

With our dataset prepared, let's apply our improved license plate detection preprocessing.

In [None]:
# Process the datasets with license plate detection
print("Starting to process train data with license plate detection...")
prepare_improved_dataset(processed_train_dir, improved_train_dir, detect_plates=True)

print("\nStarting to process test data with license plate detection...")
prepare_improved_dataset(processed_test_dir, improved_test_dir, detect_plates=True)

## Now Set Up the Data Generators

With both basic and improved datasets ready, we can set up our data generators.

In [None]:
# Set up the data generators
img_size = (128, 128)
batch_size = 32

# Training data generator with augmentation
train_gen = plate_datagen.flow_from_directory(
    improved_train_dir,  # Use improved dataset with plate detection
    target_size=img_size,
    color_mode='grayscale',
    batch_size=batch_size,
    class_mode='categorical'
)

# Test data generator (no augmentation needed for validation)
test_gen = val_datagen.flow_from_directory(
    improved_test_dir,  # Use improved dataset with plate detection
    target_size=img_size,
    color_mode='grayscale',
    batch_size=batch_size,
    class_mode='categorical'
)

# Get class indices and number of classes
class_indices = train_gen.class_indices
class_labels = list(class_indices.keys())
num_classes = len(class_indices)

print(f"Found {num_classes} classes: {class_labels}")

In [None]:
# Create and train the improved model
# Create the model
improved_model = create_improved_model(input_shape=(128, 128, 1), num_classes=num_classes)

# Display model summary
improved_model.summary()

# Set up callbacks
model_checkpoint = ModelCheckpoint(
    "improved_plate_model.h5",
    monitor="val_accuracy",
    verbose=1,
    save_best_only=True,
    mode="max"
)

early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=10,
    restore_best_weights=True
)

# Train the model
history = improved_model.fit(
    train_gen,
    epochs=50,  # Adjust as needed
    validation_data=test_gen,
    callbacks=[model_checkpoint, early_stopping]
)

## Importing Data from Original MLDEV_02 Notebook

Let's load the trained model and data from the original notebook to use with our improved pipeline.

In [None]:
# Load the original model from MLDEV_02 notebook
original_model_path = "final_model.h5"  # Path to the model trained in MLDEV_02

print(f"Loading original model from {original_model_path}...")
try:
    original_model = load_model(original_model_path)
    print("Original model loaded successfully!")
    original_model.summary()
except Exception as e:
    print(f"Error loading original model: {e}")
    print("Make sure the model file exists and is accessible.")

In [None]:
# Get class mappings from original data
try:
    # Load original training data to get class indices
    original_train_path = 'final/train'
    
    temp_datagen = ImageDataGenerator(rescale=1./255)
    original_data = temp_datagen.flow_from_directory(
        original_train_path,
        target_size=(128, 128),
        color_mode='grayscale',
        batch_size=1,
        class_mode='categorical'
    )
    
    # Get original class indices and labels
    original_class_indices = original_data.class_indices
    original_class_labels = list(original_class_indices.keys())
    
    print(f"Found {len(original_class_labels)} classes in original data:")
    for i, label in enumerate(original_class_labels):
        print(f"  {i}: {label}")
        
    # Save these for later use with our improved model
    import json
    with open('class_labels.json', 'w') as f:
        json.dump(original_class_labels, f)
    print("Class labels saved to class_labels.json")
    
except Exception as e:
    print(f"Error accessing original training data: {e}")
    print("Make sure the original dataset paths are correct.")

In [None]:
# Test original model on a sample image
def test_original_model(img_path):
    if 'original_model' not in globals():
        print("Original model not loaded. Please run the previous cell first.")
        return
    
    # Use the original preprocessing function from MLDEV_02
    img = basic_preprocess_image(img_path)
    
    # Make prediction with original model
    prediction = original_model.predict(img)
    predicted_class_index = np.argmax(prediction, axis=1)[0]
    predicted_class = original_class_labels[predicted_class_index]
    confidence = prediction[0][predicted_class_index]
    
    # Display the image and prediction
    img_display = Image.open(img_path)
    plt.figure(figsize=(8, 6))
    plt.imshow(img_display)
    plt.title(f"Original Model Prediction: {predicted_class} ({confidence:.2f})")
    plt.axis('off')
    plt.show()
    
    print(f"Original model predicts: {predicted_class} with {confidence:.2f} confidence")
    return predicted_class, confidence

# You can test with any image from your dataset
# Example: test_original_model('path/to/test/image.jpg')

## Transfer Learning: Using the Original Model as Base

We can leverage the weights from the original model as a starting point for our improved model.

In [None]:
def create_transfer_model(original_model, num_classes=29):
    """Create a transfer learning model based on the original model"""
    # Extract layers from the original model up to the flatten layer
    base_model = tf.keras.models.Sequential()
    
    # Add convolutional layers from the original model
    for layer in original_model.layers[:-3]:  # Skip the last Dense layers
        base_model.add(layer)
        
    # Freeze the base layers
    for layer in base_model.layers:
        layer.trainable = False
    
    # Create a new model with our improved architecture
    inputs = Input(shape=(128, 128, 1))
    x = base_model(inputs)
    
    # Add our improved dense layers
    x = Dense(256, activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)
    
    transfer_model = Model(inputs=inputs, outputs=outputs)
    transfer_model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return transfer_model

# Create the transfer learning model
if 'original_model' in globals():
    transfer_model = create_transfer_model(original_model, num_classes=len(original_class_labels))
    transfer_model.summary()
    print("Transfer learning model created successfully!")
else:
    print("Original model not loaded. Please run the previous cells first.")

## Upload and Extract Dataset in Google Colab

First, let's make sure we can get the data from the uploaded zip file.

In [None]:
# For Google Colab: Mount Google Drive (if needed)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    print("Google Drive mounted.")
except ImportError:
    print("Not running in Google Colab or drive already mounted.")

In [None]:
# Check if the zip file exists or prompt the user to upload it
import os
import zipfile

zip_path = '/content/num+plate.zip'
extract_to = 'dataset'

# Check if the zip file exists
if not os.path.exists(zip_path):
    from google.colab import files
    print("Please upload the num+plate.zip file.")
    uploaded = files.upload()
    for filename in uploaded.keys():
        print(f"Uploaded {filename}")
        if filename.endswith('.zip'):
            # Move the uploaded file to the expected location
            !mv "{filename}" "{zip_path}"

# Now extract the zip file
if os.path.exists(zip_path):
    print(f"Extracting {zip_path}...")
    os.makedirs(extract_to, exist_ok=True)
    
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    
    print(f"Extracted {zip_path} to {extract_to}/")
else:
    print(f"Error: {zip_path} not found. Please upload the zip file.")

In [None]:
# Verify the dataset structure
num_plate_folder = os.path.join(extract_to, "num plate")

if os.path.exists(num_plate_folder):
    # List all class folders
    class_folders = [f for f in os.listdir(num_plate_folder) 
                    if os.path.isdir(os.path.join(num_plate_folder, f))]
    
    print(f"Found {len(class_folders)} classes in the dataset:")
    for i, folder in enumerate(class_folders):
        class_path = os.path.join(num_plate_folder, folder)
        num_images = len([f for f in os.listdir(class_path) 
                         if f.lower().endswith(('.png', '.jpg', '.jpeg'))])
        print(f"  {i+1}. {folder}: {num_images} images")
else:
    print(f"Error: Could not find folder {num_plate_folder}")
    print("Please check the structure of your zip file.")
    print("The expected structure is: num+plate.zip → num plate → [class folders]")

In [None]:
# Create all required directories
aug_data_dir = "aug_data"
processed_train_dir = "processed/train"
processed_test_dir = "processed/test"
final_train_dir = "final/train"
final_test_dir = "final/test"
improved_train_dir = "improved_data/train"
improved_test_dir = "improved_data/test"

# Create all directories
for directory in [aug_data_dir, processed_train_dir, processed_test_dir, 
                  final_train_dir, final_test_dir, improved_train_dir, improved_test_dir]:
    os.makedirs(directory, exist_ok=True)
    print(f"Created directory: {directory}")

## Install Required Packages

Make sure we have all the required packages installed in Colab.

In [None]:
# Install required packages (for Google Colab)
!pip install -q opencv-python
!pip install -q scikit-image
!pip install -q imutils

# Verify installations
import cv2
import skimage
import imutils

print(f"OpenCV version: {cv2.__version__}")
print(f"scikit-image version: {skimage.__version__}")
print(f"imutils version: {imutils.__version__}")

## Export the Model for Android App

Let's convert our trained model to TensorFlow Lite format for use in our Android app.

In [None]:
# Convert model to TFLite for Android
try:
    if 'improved_model' in globals():
        # Convert model to TFLite format
        converter = tf.lite.TFLiteConverter.from_keras_model(improved_model)
        tflite_model = converter.convert()
        
        # Save the TFLite model
        tflite_path = "android_model.tflite"
        with open(tflite_path, 'wb') as f:
            f.write(tflite_model)
        
        print(f"Model converted and saved to {tflite_path}")
        print(f"Model size: {os.path.getsize(tflite_path) / (1024 * 1024):.2f} MB")
        
        # Save class labels
        label_path = "android_labels.txt"
        with open(label_path, 'w') as f:
            for label in class_labels:
                f.write(f"{label}\n")
        
        print(f"Class labels saved to {label_path}")
        
        # Download files to local machine
        from google.colab import files
        files.download(tflite_path)
        files.download(label_path)
    else:
        print("Model not found. Please train the model first.")
except Exception as e:
    print(f"Error converting model: {e}")