Making data augmentation

In [None]:
import cv2
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import regularizers
import os
import random
import matplotlib.pyplot as plt
# ------------------------------
# Step 1: Dataset Augmentation
# ------------------------------

# Data Augmentation Generator with Reduced Geometry Impact
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,       # Reduced rotation
    width_shift_range=0.2,   # Smaller horizontal shift
    height_shift_range=0.2,  # Smaller vertical shift
    shear_range=0.2,         # Reduced shear
    zoom_range=0.1,          # Minor zoom variation
    horizontal_flip=True,    # Keep horizontal flips
    vertical_flip=False,     # Remove vertical flips
    brightness_range=[0.7, 1.3],  # Slight brightness variation
    fill_mode='nearest'
)
"""
def add_shadow(img):
    rows, cols = img.shape
    shadow = np.ones_like(img) * 255  # White base canvas

    shadow_type = random.choice(["ellipse", "rectangle", "polygon"])
    if shadow_type == "ellipse":
        center = (random.randint(0, cols), random.randint(0, rows))
        axes = (random.randint(20, cols // 3), random.randint(20, rows // 3))
        angle = random.randint(0, 360)
        cv2.ellipse(shadow, center, axes, angle, 0, 360, (random.randint(50, 100)), -1)
    elif shadow_type == "rectangle":
        x1, y1 = random.randint(0, cols // 2), random.randint(0, rows // 2)
        x2, y2 = random.randint(cols // 2, cols), random.randint(rows // 2, rows)
        cv2.rectangle(shadow, (x1, y1), (x2, y2), (random.randint(50, 100)), -1)
    elif shadow_type == "polygon":
        num_points = random.randint(4, 8)
        points = np.array([[(random.randint(0, cols), random.randint(0, rows)) for _ in range(num_points)]], dtype=np.int32)
        cv2.fillPoly(shadow, points, (random.randint(50, 100)))

    shadow = cv2.GaussianBlur(shadow, (25, 25), 0)
    alpha = random.uniform(0.5, 0.7)
    shadow_overlay = cv2.addWeighted(img, 1 - alpha, shadow, alpha, 0)

    noise = np.random.normal(0, 10, img.shape).astype(np.float32)
    noise = np.clip(noise, 0, 255).astype(np.uint8)
    shadow_with_noise = cv2.add(shadow_overlay, noise)

    return np.clip(shadow_with_noise, 0, 255).astype(np.uint8)

def manual_augmentation(img, output_dir, prefix, start_index, num_images, is_blank=False):

    def center_and_fit(rotated_img, target_size=(64, 64)):
        # Center the rotated letter and resize moderately to fit the target canvas.
        coords = cv2.findNonZero(255 - rotated_img)
        x, y, w, h = cv2.boundingRect(coords)

        # Extract letter and compute scaling factor
        letter = rotated_img[y:y+h, x:x+w]
        scale = min(target_size[0] / h, target_size[1] / w) * 0.95

        # Resize and center the letter on a blank canvas
        resized = cv2.resize(letter, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
        canvas = np.ones(target_size, dtype=np.uint8) * 255
        y_offset = (target_size[0] - resized.shape[0]) // 2
        x_offset = (target_size[1] - resized.shape[1]) // 2
        canvas[y_offset:y_offset+resized.shape[0], x_offset:x_offset+resized.shape[1]] = resized

        return canvas

    rows, cols = img.shape
    for i in range(num_images):
        # Start with a larger canvas
        canvas_size = 128
        canvas = np.ones((canvas_size, canvas_size), dtype=np.uint8) * 255
        y_offset = (canvas_size - rows) // 2
        x_offset = (canvas_size - cols) // 2
        canvas[y_offset:y_offset+rows, x_offset:x_offset+cols] = img

        # Moderate random rotation (-15° to 15°)
        angle = random.randint(-15, 15)
        M = cv2.getRotationMatrix2D((canvas_size // 2, canvas_size // 2), angle, 1.0)
        rotated_img = cv2.warpAffine(canvas, M, (canvas_size, canvas_size), borderMode=cv2.BORDER_CONSTANT, borderValue=255)

        # Fit and center the letter
        final_img = center_and_fit(rotated_img, target_size=(64, 64))

        # Add shadows only for "white" class
        if is_blank:
            final_img = add_shadow(final_img)

        save_path = os.path.join(output_dir, f"{prefix}_{start_index + i}.jpg")
        cv2.imwrite(save_path, final_img)
"""

def manual_augmentation(img, output_dir, prefix, start_index, num_images, is_blank=False):
    """
    Perform manual augmentations with texture, lighting, and noise enhancements.
    """
    def center_and_fit(rotated_img, target_size=(64, 64)):
        """ Center and scale the letter to fit within the target canvas. """
        coords = cv2.findNonZero(255 - rotated_img)
        x, y, w, h = cv2.boundingRect(coords)
        letter = rotated_img[y:y+h, x:x+w]
        scale = min(target_size[0] / h, target_size[1] / w) * 0.9
        resized = cv2.resize(letter, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
        canvas = np.ones(target_size, dtype=np.uint8) * 255
        y_offset = (target_size[0] - resized.shape[0]) // 2
        x_offset = (target_size[1] - resized.shape[1]) // 2
        canvas[y_offset:y_offset+resized.shape[0], x_offset:x_offset+resized.shape[1]] = resized
        return canvas

    rows, cols = img.shape
    for i in range(num_images):
        # Start with a larger canvas
        canvas_size = 128
        canvas = np.ones((canvas_size, canvas_size), dtype=np.uint8) * 255
        y_offset = (canvas_size - rows) // 2
        x_offset = (canvas_size - cols) // 2
        canvas[y_offset:y_offset+rows, x_offset:x_offset+cols] = img

        # Apply rotation
        angle = random.randint(-60, 60)
        M = cv2.getRotationMatrix2D((canvas_size // 2, canvas_size // 2), angle, 1.0)
        rotated_img = cv2.warpAffine(canvas, M, (canvas_size, canvas_size), borderMode=cv2.BORDER_CONSTANT, borderValue=255)

        # Add lighting variations
        alpha = random.uniform(0.8, 1.2)  # Contrast control
        beta = random.randint(-40, 40)    # Brightness control
        lighting_adjusted = cv2.convertScaleAbs(rotated_img, alpha=alpha, beta=beta)

        # Add Gaussian noise
        noise = np.random.normal(0, 10, lighting_adjusted.shape).astype(np.uint8)
        noisy_img = cv2.add(lighting_adjusted, noise)

        # Add textures for "white" class (or as noise)
        if is_blank:
            texture = np.random.randint(200, 255, noisy_img.shape, dtype=np.uint8)
            texture = cv2.GaussianBlur(texture, (11, 11), 5)
            noisy_img = cv2.addWeighted(noisy_img, 0.8, texture, 0.2, 0)

        # Center and fit the letter
        final_img = center_and_fit(noisy_img, target_size=(64, 64))

        # Save augmented image
        save_path = os.path.join(output_dir, f"{prefix}_{start_index + i}.jpg")
        cv2.imwrite(save_path, final_img)

def augment_and_save(image_path, output_dir, prefix, total_images=500, is_blank=False):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Error: Image not found at {image_path}")
        return

    img = cv2.resize(img, (64, 64))
    img = img.reshape((1,) + img.shape + (1,))
    letter_folder = os.path.join(output_dir, prefix.upper())
    os.makedirs(letter_folder, exist_ok=True)

    i = 0
    for batch in datagen.flow(img, batch_size=1, save_prefix=None, save_format='jpg'):
        save_path = os.path.join(letter_folder, f"{prefix}_{i}.jpg")
        cv2.imwrite(save_path, (batch[0, :, :, 0] * 255).astype(np.uint8))
        i += 1
        if i >= total_images // 2:
            break

    manual_augmentation(img[0, :, :, 0], letter_folder, prefix, start_index=i, num_images=total_images // 2, is_blank=is_blank)

# Generate Dataset
original_image_dir = "original_images"
augmented_image_dir = "augmented_images_2000"
os.makedirs(augmented_image_dir, exist_ok=True)

for filename in os.listdir(original_image_dir):
    if filename.lower().endswith(('.jpg', '.png', '.jpeg')):
        is_blank = True if "white" in filename.lower() else False
        augment_and_save(os.path.join(original_image_dir, filename), augmented_image_dir, filename.split('.')[0], 500, is_blank)

print("Dataset augmentation completed successfully!")


Make the model

In [None]:
import os
import numpy as np
import tensorflow as tf
from tflite_model_maker import image_classifier, model_spec
from tflite_model_maker.config import QuantizationConfig, ExportFormat
from tflite_model_maker.image_classifier import DataLoader
import matplotlib.pyplot as plt

# Ensure TensorFlow 2.x is being used
assert tf.__version__.startswith('2')

# ------------------------------
# Step 1: Dataset Preparation
# ------------------------------

# Paths
data_dir = "augmented_images_2000"  # Replace with your dataset path
img_size = 64  # Image size for resizing
batch_size = 16  # Batch size for training

# Load dataset into DataLoader with augmentation
train_data = DataLoader.from_folder(data_dir)
train_data = train_data.split(0.8)  # 80% Training, 20% Validation

# ------------------------------
# Step 2: Train the Model Using TFLite Model Maker
# ------------------------------

# Select the model architecture: EfficientNet-Lite0 is the default
spec = model_spec.get("efficientnet_lite0")

# Train the model
model = image_classifier.create(
    train_data=train_data[0],
    validation_data=train_data[1],
    model_spec=spec,
    epochs=30,
    batch_size=batch_size,
    learning_rate=0.001
)

# ------------------------------
# Step 3: Evaluate the Model
# ------------------------------

# Evaluate on the validation set
loss, accuracy = model.evaluate(train_data[1])
print(f"Validation Accuracy: {accuracy:.2%}")

# ------------------------------
# Step 4: Export the Model
# ------------------------------

# Save the TFLite model
tflite_filename = "letter_classification_model.tflite"
model.export(
    export_dir=".",
    export_format=[ExportFormat.TFLITE, ExportFormat.LABEL]
)
print(f"Model exported as {tflite_filename}")

# ------------------------------
# Step 5: Visualize Predictions
# ------------------------------

# Function to display predictions
def predict_image(image_path, model):
    """
    Load and predict a single image using the TFLite model.
    """
    img = tf.keras.preprocessing.image.load_img(image_path, target_size=(img_size, img_size))
    plt.imshow(img)
    plt.axis("off")
    plt.show()

    predictions = model.predict_top_k(image_path, k=4)  # Show top 4 predictions
    for i, (label, prob) in enumerate(predictions):
        print(f"{i+1}. {label} ({prob:.2%})")

# Test the model with an example image
test_image_path = "H_test.JPG"  # Replace with your test image path
predict_image(test_image_path, model)

Test the model

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
import cv2
import numpy as np
import matplotlib.pyplot as plt

# ------------------------------
# Step 1: Load the Trained PyTorch Model
# ------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the CNN Model Architecture (must match the trained model)
# Adjusted Model Definition
class LetterClassificationModel(nn.Module):
    def __init__(self):
        super(LetterClassificationModel, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, padding=1, stride=1),  # conv_layers.0
            nn.ReLU(),
            nn.BatchNorm2d(32),  # conv_layers.2
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1, stride=1),  # conv_layers.4
            nn.ReLU(),
            nn.BatchNorm2d(64),  # conv_layers.6
            nn.MaxPool2d(2, 2),
            nn.Dropout(0.5),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 16 * 16, 128),  # fc_layers.0
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 4)  # fc_layers.2
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

# Instantiate the Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LetterClassificationModel().to(device)

# Load State Dict
model_path = "letter_classification_with_white.pth"
state_dict = torch.load(model_path, map_location=device)

# Fix Strict Loading by Ignoring Missing Keys (if necessary)
model.load_state_dict(state_dict, strict=False)
model.eval()

print("Model loaded successfully!")

# ------------------------------
# Step 2: Preprocess the Image
# ------------------------------
def preprocess_image(image_path):
    """
    Load an image, preprocess it to grayscale, resize it, and normalize for PyTorch.
    """
    # Load the image with OpenCV as grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        raise ValueError(f"Error: Image not found at {image_path}")

    # Resize the image to 64x64
    img_resized = cv2.resize(img, (64, 64))

    # Normalize and convert to tensor
    transform = transforms.Compose([
        transforms.ToTensor(),  # Converts to range [0,1]
        transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize to [-1, 1]
    ])
    img_tensor = transform(img_resized).unsqueeze(0)  # Add batch dimension

    return img_tensor.to(device), img_resized

# ------------------------------
# Step 3: Make Prediction
# ------------------------------
def predict_image(image_path, model, class_labels):
    """
    Predict the class of an input image and display it with the result.
    Print confidence percentages for all classes.
    """
    # Preprocess the image
    img_tensor, img_display = preprocess_image(image_path)

    # Perform inference
    with torch.no_grad():
        outputs = model(img_tensor)
        probabilities = torch.softmax(outputs[0], dim=0).cpu().numpy()

    # Get predicted class
    predicted_class_index = np.argmax(probabilities)
    predicted_class = class_labels[predicted_class_index]
    confidence = probabilities[predicted_class_index] * 100

    # Display the image
    plt.imshow(img_display, cmap="gray")
    plt.title(f"Predicted: {predicted_class} ({confidence:.2f}%)")
    plt.axis("off")
    plt.show()

    # Print confidence percentages for all classes
    print("Class Confidence Percentages:")
    for i, class_label in enumerate(class_labels):
        print(f"{class_label}: {probabilities[i] * 100:.2f}%")

    print(f"\nFinal Prediction: {predicted_class} with {confidence:.2f}% confidence.")

# ------------------------------
# Step 4: Test the Model with a Photo
# ------------------------------
# Define class labels (must match the order of training)
class_labels = ["H", "S", "U", "White"]  # Include "White" as the fourth class

# Test Image Path
test_image_path = "S.png"  # Replace with your actual image path

# Predict and Display
predict_image(test_image_path, model, class_labels)