In [None]:
# The data is taken from kaggle using the following link : https://www.kaggle.com/datasets/jcprogjava/handwritten-digits-dataset-not-in-mnist

In [1]:
!pip install potracer

Collecting potracer
  Downloading potracer-0.0.4-py2.py3-none-any.whl (24 kB)
Installing collected packages: potracer
Successfully installed potracer-0.0.4


In [2]:
import zipfile
import os
import pandas as pd
import numpy as np
import glob
import cv2
import matplotlib.pyplot as plt
import potrace
import xml.etree.ElementTree as ET
import re
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models, optimizers, callbacks
from sklearn.utils.class_weight import compute_class_weight

In [3]:
# Function to get SVG path of a raster image
def get_vector(filename, nudge = 0.33, bilateral_filter=True, use_l2_gradient=True):
    # Read the image in RGBA mode
    image = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
    alpha_channel = image[:, :, 3]
    # Set all non-zero alpha pixels to the specified value
    alpha_channel[alpha_channel > 180] = 255  # White
    alpha_channel[alpha_channel <= 180] = 0
    # Create a new image where only the alpha channel is used for edge detection
    bitmap = potrace.Bitmap(alpha_channel)
    path = bitmap.trace(turdsize=1, turnpolicy=potrace.POTRACE_TURNPOLICY_MINORITY, alphamax=1, opticurve=0, opttolerance=0.2)
    # Generate SVG path data
    svg_path_data = []
    for curve in path.curves:
        segments = curve.segments
        start = curve.start_point
        d = f'M {start.x},{start.y} '  # Move to the start point
        for segment in segments:
            if segment.is_corner:
                x1, y1 = segment.c.x, segment.c.y
                x2, y2 = segment.end_point.x, segment.end_point.y
                d += f'L {x1},{y1} {x2},{y2} '  # Line to the control and end point
            else:
                x1, y1 = segment.c1.x, segment.c1.y
                x2, y2 = segment.c2.x, segment.c2.y
                x3, y3 = segment.end_point.x, segment.end_point.y
                d += f'C {x1},{y1} {x2},{y2} {x3},{y3} '  # Cubic Bezier curve to the control and end points
            start = segment.end_point
        svg_path_data.append(d.strip())
    return svg_path_data

In [4]:
# Function to get a .svg file from the path
def create_svg_file(svg_paths, filename, width=500, height=500):
    # Create the root element
    svg = ET.Element('svg', xmlns="http://www.w3.org/2000/svg", version="1.1", width=str(width), height=str(height))

    # Add each path to the SVG
    for path_data in svg_paths:
        path = ET.SubElement(svg, 'path', d=path_data, fill="none", stroke="black", stroke_width="1")

    # Create the tree structure and write to file
    tree = ET.ElementTree(svg)
    tree.write(filename)

In [5]:
zip_file_name = '/content/drive/MyDrive/archive.zip'  # Change this to the name of your ZIP file if different
unzip_dir = '/content/unzipped_archive/'  # Extract to your local environment

os.makedirs(unzip_dir, exist_ok=True)

with zipfile.ZipFile(zip_file_name, 'r') as zip_ref:
    zip_ref.extractall(unzip_dir)

print(f'Files extracted to {unzip_dir}')

Files extracted to /content/unzipped_archive/


In [6]:
def load_images_from_folder(folder_path):
    images = []
    labels = []
    vectors = []
    for label in os.listdir(folder_path):
        label_path = os.path.join(folder_path, label)
        if os.path.isdir(label_path):
            for subfolder in os.listdir(label_path):
                subfolder_path = os.path.join(label_path, subfolder)
                if os.path.isdir(subfolder_path):
                    for image_file in glob.glob(os.path.join(subfolder_path, '*.png')):
                        try:
                            images.append(image_file)
                            labels.append(label)
                            vectors.append(get_vector(image_file))
                        except Exception as e:
                            print(f'Error loading image {image_file}: {e}')
    return images, labels , vectors


In [7]:
# Path to the dataset folder
dataset_path = os.path.join(unzip_dir, '/content/unzipped_archive/dataset')
images, labels, vectors = load_images_from_folder(dataset_path)

In [8]:
# Create a DataFrame
df = pd.DataFrame({
    'image': images,
    'label': labels,
    'vector': vectors
})
df = df.sort_values(by=['label']).reset_index(drop=True)

In [9]:
# Commands
commands = ['M', 'L', 'C']
commands_dict = {cmd: idx for idx, cmd in enumerate(commands)}

# Tokenize SVG paths
def tokenize_svg_path(svg_path):
    tokens = []
    for path in svg_path:
        parts = path.split()
        i = 0
        while i < len(parts):
            command = parts[i]
            i += 1
            if command == 'M':
                tokens.append((command, parts[i]))
                i += 1
            elif command == 'L':
                for _ in range(2):
                    tokens.append((command, parts[i]))
                    i += 1
            elif command == 'C':
                for _ in range(3):
                    tokens.append((command, parts[i]))
                    i += 1
    return tokens

# Separate commands and coordinates
def separate_tokens(tokens):
    command_tokens = [t[0] for t in tokens]
    coordinate_tokens = [t[1] for t in tokens]
    return command_tokens, coordinate_tokens

# Encode commands
def encode_commands(command_tokens):
    command_indices = [commands_dict[cmd] for cmd in command_tokens]
    return tf.constant(command_indices, dtype=tf.int32)

# Convert coordinates to floats and pad
def process_coordinates(coordinate_tokens):
    coordinate_floats = []
    for coord in coordinate_tokens:
        coords = coord.split(',')
        if len(coords) == 2:
            try:
                x, y = map(float, coords)
                coordinate_floats.append([x, y])
            except ValueError as e:
                print(f"Error parsing coordinates '{coord}': {e}")
        elif len(coords) == 4:
            try:
                coords = list(map(float, coords))
                for i in range(0, len(coords), 2):
                    coordinate_floats.append([coords[i], coords[i+1]])
            except ValueError as e:
                print(f"Error parsing coordinates '{coord}': {e}")
        elif len(coords) == 6:
            try:
                coords = list(map(float, coords))
                for i in range(0, len(coords), 2):
                    coordinate_floats.append([coords[i], coords[i+1]])
            except ValueError as e:
                print(f"Error parsing coordinates '{coord}': {e}")
        else:
            print(f"Unexpected number of coordinates in '{coord}'")

    # Convert to numpy array
    if coordinate_floats:
        coordinate_array = np.array(coordinate_floats)
    else:
        coordinate_array = np.empty((0, 2))

    return coordinate_array

In [10]:
vocab_size = len(commands)
embedding_dim = 2
embedding_layer = tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim)


In [11]:
# Padding the data to feed transformer
def process_svg_paths(df):
    combined_sequences = []
    max_sequence_length = 0

    for index, row in df.iterrows():
        path = row['vector']
        tokens = tokenize_svg_path(path)
        command_tokens, coordinate_tokens = separate_tokens(tokens)
        if len(command_tokens) == 0:
            print(row)
        command_indices = encode_commands(command_tokens)
        command_embeddings = embedding_layer(command_indices)
        coordinate_array = process_coordinates(coordinate_tokens)

        sequence_length = len(command_tokens)
        max_sequence_length = max(max_sequence_length, sequence_length)

        combined_sequences.append((command_embeddings, coordinate_array))

    padded_sequences = []
    for command_embeddings, coordinate_array in combined_sequences:
        command_pad_length = max_sequence_length - tf.shape(command_embeddings)[0]
        coordinate_pad_length = max_sequence_length - coordinate_array.shape[0]

        if command_pad_length > 0:
            command_embeddings_padded = tf.pad(command_embeddings, [[0, command_pad_length], [0, 0]])
        else:
            command_embeddings_padded = command_embeddings

        if coordinate_array.ndim == 1:
            coordinate_array = np.expand_dims(coordinate_array, axis=0)
        if coordinate_pad_length > 0:
            coordinate_array_padded = np.pad(coordinate_array, ((0, coordinate_pad_length), (0, 0)), 'constant')
        else:
            coordinate_array_padded = coordinate_array

        coordinate_array_padded_tensor = tf.convert_to_tensor(coordinate_array_padded, dtype=tf.float32)

        if command_embeddings_padded.shape[0] != coordinate_array_padded_tensor.shape[0]:
            print(f"Shape mismatch found: command_embeddings_padded shape: {command_embeddings_padded.shape}, coordinate_array_padded_tensor shape: {coordinate_array_padded_tensor.shape}")
            continue  # Skip this data point

        try:
            combined_sequence = tf.concat([command_embeddings_padded, coordinate_array_padded_tensor], axis=1)
            padded_sequences.append(combined_sequence)
        except Exception as e:
            print(f"Error concatenating tensors: {e}")
            print(f"command_embeddings_padded shape: {command_embeddings_padded.shape}")
            print(f"coordinate_array_padded_tensor shape: {coordinate_array_padded_tensor.shape}")

    return padded_sequences

In [12]:
# Split the data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

print(f"Training set size: {len(train_df)}")
print(f"Validation set size: {len(val_df)}")
print(f"Test set size: {len(test_df)}")

Training set size: 68947
Validation set size: 17237
Test set size: 21546


In [13]:
def prepare_data(df):
    max_sequence_length = 90
    sequences = process_svg_paths(df)
    padded_sequences = [
        tf.pad(seq, [[0, max_sequence_length - tf.shape(seq)[0]], [0, 0]])
        for seq in sequences
    ]
    padded_sequences = tf.stack(padded_sequences)
    labels = df['label'].values
    return padded_sequences, labels

In [14]:
# Prepare training, validation, and test data
train_sequences, train_labels = prepare_data(train_df)
val_sequences, val_labels = prepare_data(val_df)
test_sequences, test_labels = prepare_data(test_df)

In [15]:
# prompt: datatype of train_sequences
train_labels = train_labels.astype('int32')
val_labels = val_labels.astype('int32')
test_labels = test_labels.astype('int32')

In [16]:
early_stopping = callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
model_checkpoint = callbacks.ModelCheckpoint('detection_model.h5', save_best_only=True, monitor='val_loss')

In [17]:
def create_transformer_model(input_shape, num_classes, num_heads=4, ff_dim=128, num_layers=2):
    inputs = layers.Input(shape=input_shape)

    # Positional Encoding
    position_embedding = layers.Embedding(input_dim=input_shape[0], output_dim=input_shape[1])(tf.range(start=0, limit=input_shape[0], delta=1))
    x = inputs + position_embedding

    for _ in range(num_layers):
        # Multi-head Self Attention
        x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=input_shape[1])(x, x)
        x = layers.LayerNormalization(epsilon=1e-6)(x)
        # Feed Forward Network
        x_ff = layers.Dense(ff_dim, activation='relu')(x)
        x_ff = layers.Dense(input_shape[1])(x_ff)
        x = layers.LayerNormalization(epsilon=1e-6)(x + x_ff)

    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = models.Model(inputs, outputs)
    return model

In [18]:
reduce_lr = callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)

In [19]:
# Example hyperparameters
learning_rate = 0.001
batch_size = 16
dropout_rate = 0.3
input_shape = (90,4)
num_classes = 10

model = create_transformer_model(input_shape, num_classes)
model.compile(optimizer=optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [20]:
# Train the model
history = model.fit(train_sequences, train_labels,
                    validation_data=(val_sequences, val_labels),
                    epochs=50,
                    batch_size=batch_size,
                    callbacks=[early_stopping, model_checkpoint, reduce_lr])

# Evaluate the model
test_loss, test_acc = model.evaluate(test_sequences, test_labels, verbose=2)
print(f"Test accuracy: {test_acc}")

Epoch 1/50
Epoch 2/50
   6/4310 [..............................] - ETA: 44s - loss: 1.3488 - accuracy: 0.4792

  saving_api.save_model(


Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
674/674 - 4s - loss: 0.1068 - accuracy: 0.9706 - 4s/epoch - 6ms/step
Test accuracy: 0.9705745577812195


In [21]:
model = models.load_model('detection_model.h5')

In [22]:
# Save the model
model.save('digit_classifier_model.h5')

In [None]:
predictions = model.predict(test_sequences)

# Convert probabilities to label indices
predicted_labels = np.argmax(predictions, axis=1)

test_df['predicted_label'] = predicted_labels

# Display the updated test_df
print(test_df)