In [2]:
import os
import xml.etree.ElementTree as ET
import numpy as np
import cv2

In [3]:
# Define functions for data preprocessing and loading
def parse_inkml(inkml_file):
    tree = ET.parse(inkml_file)
    root = tree.getroot()
    
    # Extract strokes and label from the InkML file
    strokes = []
    label = None
    
    for trace_group in root.iter('{http://www.w3.org/2003/InkML}traceGroup'):
        strokes_data = []
        for trace in trace_group.iter('{http://www.w3.org/2003/InkML}trace'):
            trace_points = trace.text.strip().split(',')
            trace_points = [list(map(int, point.split())) for point in trace_points]
            strokes_data.append(trace_points)
        strokes.append(strokes_data)
    
    for annotation in root.iter('{http://www.w3.org/2003/InkML}annotation'):
        if annotation.attrib.get('type') == 'truth':
            label = annotation.text.strip()
    
    return strokes, label

def strokes_to_image(strokes, image_size=(128, 128)):
    # Create an empty canvas
    canvas = np.zeros((image_size[0], image_size[1], 3), dtype=np.uint8)
    
    # Draw strokes onto the canvas
    for stroke in strokes:
        for i in range(1, len(stroke)):
            cv2.line(canvas, tuple(stroke[i-1]), tuple(stroke[i]), (255, 255, 255), 2)
    
    # Resize the canvas to the desired image size
    canvas = cv2.resize(canvas, image_size)
    
    return canvas

# Specify the directory containing the InkML files
directory = 'c:\\Users\\DELL\\Desktop\\lab\\.vscode\\train'

# Create a list to store processed data and labels
data = []
labels = []

# Loop through all files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.inkml'):
        # Construct the full file path
        file_path = os.path.join(directory, filename)

        # Parse the InkML file
        strokes, label = parse_inkml(file_path)

        # Convert strokes to images
        image = strokes_to_image(strokes)

        # Append data and labels
        data.append(image)
        labels.append(label)

# Convert data and labels to NumPy arrays
data_array = np.array(data)
labels_array = np.array(labels)

# Save data and labels as NumPy arrays
np.save('data.npy', data_array)
np.save('labels.npy', labels_array)

print("Data and labels saved successfully.")

Data and labels saved successfully.


In [6]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import LabelEncoder
import joblib

# Load the processed data and labels
data = np.load('data.npy')
labels = np.load('labels.npy')

# Normalize the data to values between 0 and 1
data = data / 255.0

# Split the data into training and validation sets (you can adjust the split ratio)
split_ratio = 0.8
split_index = int(len(data) * split_ratio)

x_train, x_val = data[:split_index], data[split_index:]
y_train, y_val = labels[:split_index], labels[split_index:]

# Encode the labels
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded = label_encoder.transform(y_val)

num_classes = len(np.unique(labels))  # Number of unique equation classes

model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax')  # Adjust the number of output neurons
])


# Compile the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Train the model
batch_size = 32  # You can adjust the batch size
epochs = 10      # You can adjust the number of epochs

history = model.fit(x_train, y_train_encoded, batch_size=batch_size, epochs=epochs,
                    validation_data=(x_val, y_val_encoded))

# Save the trained model
model.save('trained_model.h5')

print("Training completed, and the model has been saved.")

# Save the label encoder
joblib.dump(label_encoder, 'label_encoder.pkl')


Epoch 1/10


  output, from_logits = _get_logits(


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


Training completed, and the model has been saved.


['label_encoder.pkl']

In [5]:
print("Number of unique equation classes:", len(np.unique(labels)))


Number of unique equation classes: 51
