In [1]:
import os
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

def parse_inkml(file_path):
    """
    Parses the InkML file and extracts stroke data.
    Returns a list of strokes, where each stroke is a list of (x, y) tuples.
    """
    tree = ET.parse(file_path)
    root = tree.getroot()
    namespace = {'ns': 'http://www.w3.org/2003/InkML'}

    strokes = []
    for trace in root.findall('ns:trace', namespace):
        coords = []
        points = trace.text.strip().split(',')
        for point in points:
            xy = point.strip().split()
            if len(xy) == 2:
                x, y = float(xy[0]), float(xy[1])
                coords.append((x, y))
        strokes.append(coords)

    return strokes

def render_strokes_to_image(strokes, save_path=None):
    """
    Renders the list of strokes to an image using matplotlib.
    """
    plt.figure(figsize=(2, 2))
    for stroke in strokes:
        if len(stroke) >= 2:
            x, y = zip(*stroke)
            plt.plot(x, [-yi for yi in y], 'k', linewidth=2)  # invert y for handwriting

    plt.axis('off')
    plt.gca().set_aspect('equal', adjustable='box')
    plt.tight_layout()

    if save_path:
        plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
        print(f"Saved image to {save_path}")
    else:
        plt.show()
    
    plt.close()

In [3]:
# Example usage
if __name__ == '__main__':
    # Example inkml file path — update this as needed!
    folder = 'archive/trainData_2012_part1'
    example_file = 'TrainData1_8_sub_12.inkml'  # change based on a real file in your folder
    file_path = os.path.join(folder, example_file)

    strokes = parse_inkml(file_path)
    render_strokes_to_image(strokes, save_path='example_output_1.png')

Saved image to example_output_1.png


In [7]:
import os
import xml.etree.ElementTree as ET

def parse_inkml(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()
    ns = {'ink': 'http://www.w3.org/2003/InkML'}

    traces_all = []
    for trace in root.findall('.//ink:trace', ns):
        coords = trace.text.strip().split(',')
        trace_points = []
        for point in coords:
            point = point.strip().replace('\n', '').split()
            if len(point) == 2:
                x, y = map(float, point)
                trace_points.append((x, y))
        traces_all.append(trace_points)

    return traces_all

In [9]:
if __name__ == '__main__':
    folder = 'archive/TrainINKML_2013'
    example_file = '2_em_3.inkml'
    file_path = os.path.join(folder, example_file)

    strokes = parse_inkml(file_path)
    
    # Print the strokes to understand structure
    for i, stroke in enumerate(strokes):
        print(f"Stroke {i+1}: {stroke}")
    
    render_strokes_to_image(strokes, save_path='example_output.png')

Stroke 1: [(308.0, 52.0), (311.0, 59.0), (312.0, 62.0), (312.0, 65.0), (313.0, 68.0), (314.0, 72.0), (315.0, 77.0), (315.0, 82.0), (315.0, 87.0), (315.0, 92.0), (314.0, 97.0), (313.0, 102.0), (312.0, 106.0), (311.0, 110.0), (310.0, 114.0), (308.0, 118.0), (307.0, 121.0), (305.0, 124.0), (302.0, 126.0), (300.0, 128.0), (298.0, 129.0), (297.0, 130.0), (295.0, 130.0), (294.0, 130.0), (293.0, 129.0), (292.0, 128.0), (292.0, 127.0), (292.0, 126.0), (293.0, 125.0), (294.0, 123.0), (295.0, 122.0), (297.0, 120.0), (299.0, 119.0), (301.0, 119.0), (303.0, 118.0), (305.0, 118.0), (307.0, 118.0), (309.0, 118.0), (311.0, 119.0), (313.0, 121.0), (315.0, 123.0), (317.0, 125.0), (319.0, 128.0), (321.0, 131.0), (323.0, 135.0), (324.0, 139.0), (326.0, 143.0), (327.0, 147.0), (327.0, 152.0), (328.0, 157.0), (329.0, 162.0), (329.0, 167.0), (330.0, 172.0), (330.0, 176.0), (331.0, 180.0), (331.0, 183.0), (331.0, 187.0), (332.0, 189.0), (332.0, 192.0), (332.0, 193.0), (333.0, 195.0)]
Stroke 2: [(358.0, 72.0)

In [11]:
#Convert all strokes to Images

In [13]:
import os
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

def parse_inkml(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()
    ns = {'ink': 'http://www.w3.org/2003/InkML'}
    traces_all = []
    for trace in root.findall('.//ink:trace', ns):
        coords = trace.text.strip().split(',')
        trace_points = []
        for point in coords:
            point = point.strip().replace('\n', '').split()
            if len(point) == 2:
                x, y = map(float, point)
                trace_points.append((x, y))
        traces_all.append(trace_points)
    return traces_all

def render_strokes_to_image(strokes, save_path, figsize=(6,6)):
    plt.figure(figsize=figsize)
    ax = plt.gca()
    for trace in strokes:
        if len(trace) > 1:
            xs, ys = zip(*trace)
            plt.plot(xs, ys, linewidth=2, color='black')
    ax.set_aspect('equal')
    plt.axis('off')
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

def convert_all_inkml_to_images(input_root_dir, output_root_dir):
    for root, dirs, files in os.walk(input_root_dir):
        for file in files:
            if file.endswith('.inkml'):
                input_file = os.path.join(root, file)
                relative_path = os.path.relpath(root, input_root_dir)
                output_dir = os.path.join(output_root_dir, relative_path)
                os.makedirs(output_dir, exist_ok=True)
                output_file = os.path.join(output_dir, file.replace('.inkml', '.png'))

                try:
                    strokes = parse_inkml(input_file)
                    render_strokes_to_image(strokes, output_file)
                    print(f"Saved: {output_file}")
                except Exception as e:
                    print(f"Error with file {input_file}: {e}")

In [None]:
# === Run for your dataset ===
if __name__ == '__main__':
    input_root = 'archive'  # adjust based on your dataset's root folder
    output_root = 'converted_images'
    convert_all_inkml_to_images(input_root, output_root)

Saved: converted_images/trainData_2012_part1/TrainData2_19_sub_98.png
Saved: converted_images/trainData_2012_part1/TrainData2_19_sub_20.png
Saved: converted_images/trainData_2012_part1/TrainData2_1_sub_33.png
Saved: converted_images/trainData_2012_part1/TrainData2_0_sub_11.png
Saved: converted_images/trainData_2012_part1/TrainData2_21_sub_95.png
Saved: converted_images/trainData_2012_part1/TrainData2_16_sub_88.png
Saved: converted_images/trainData_2012_part1/TrainData2_14_sub_46.png
Saved: converted_images/trainData_2012_part1/TrainData2_6_sub_33.png
Saved: converted_images/trainData_2012_part1/TrainData2_7_sub_11.png
Saved: converted_images/trainData_2012_part1/TrainData2_3_sub_6.png
Saved: converted_images/trainData_2012_part1/TrainData2_11_sub_88.png
Saved: converted_images/trainData_2012_part1/TrainData2_26_sub_95.png
Saved: converted_images/trainData_2012_part1/TrainData2_13_sub_46.png
Saved: converted_images/trainData_2012_part1/TrainData2_2_sub_43.png
Saved: converted_images/tra

In [5]:
import os

def convert_inkml_folder_to_images(input_folder, output_folder):
    """
    Converts all InkML files in the input_folder into images saved in output_folder.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    for filename in os.listdir(input_folder):
        if filename.endswith('.inkml'):
            input_path = os.path.join(input_folder, filename)
            output_filename = os.path.splitext(filename)[0] + '.png'
            output_path = os.path.join(output_folder, output_filename)

            try:
                strokes = parse_inkml(input_path)
                render_strokes_to_image(strokes, save_path=output_path)
            except Exception as e:
                print(f"Failed to convert {filename}: {e}")

In [7]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/CROHME_test_2011'  # Change to your actual folder name
    output_folder = 'Images_CROHME_test_2011'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_CROHME_test_2011/Inkdata_temp_InkFR_HPR_EQU_NOC_scc198_fi5_db144234.png
Saved image to Images_CROHME_test_2011/Inkdata_temp_InkFR_HPR_EQU_NOC_scc132_fi5_db141234.png
Saved image to Images_CROHME_test_2011/Inkdata_temp_InkFR_HPR_EQU_NOC_scc372_fi6_db141415.png
Saved image to Images_CROHME_test_2011/formulaire052-equation017.png
Saved image to Images_CROHME_test_2011/Inkdata_temp_InkFR_HPR_EQU_NOC_scc174_fi4_db143083.png
Saved image to Images_CROHME_test_2011/TestData2_1_sub_14.png
Saved image to Images_CROHME_test_2011/formulaire054-equation055.png
Saved image to Images_CROHME_test_2011/Inkdata_temp_InkFR_HPR_EQU_NOC_scc576_fi6_db139525.png
Saved image to Images_CROHME_test_2011/TestData2_3_sub_27.png
Saved image to Images_CROHME_test_2011/TestData2_3_sub_62.png
Saved image to Images_CROHME_test_2011/TestData2_2_sub_40.png
Saved image to Images_CROHME_test_2011/Inkdata_temp_InkFR_HPR_EQU_NOC_scc273_fi5_db137334.png
Saved image to Images_CROHME_test_2011/Inkdata_tem

In [9]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/CROHME_training_2011'  # Change to your actual folder name
    output_folder = 'Images_CROHME_training_2011'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_CROHME_training_2011/TrainData1_5_sub_21.png
Saved image to Images_CROHME_training_2011/TrainData2_19_sub_98.png
Saved image to Images_CROHME_training_2011/TrainData1_7_sub_12.png
Saved image to Images_CROHME_training_2011/TrainData2_19_sub_20.png
Saved image to Images_CROHME_training_2011/TrainData1_2_sub_21.png
Saved image to Images_CROHME_training_2011/TrainData1_0_sub_12.png
Saved image to Images_CROHME_training_2011/TrainData1_0_sub_5.png
Saved image to Images_CROHME_training_2011/TrainData2_1_sub_33.png
Saved image to Images_CROHME_training_2011/TrainData2_0_sub_11.png
Saved image to Images_CROHME_training_2011/formulaire001-equation070.png
Saved image to Images_CROHME_training_2011/TrainData2_21_sub_95.png
Saved image to Images_CROHME_training_2011/formulaire002-equation051.png
Saved image to Images_CROHME_training_2011/TrainData2_16_sub_88.png
Saved image to Images_CROHME_training_2011/TrainData2_14_sub_46.png
Saved image to Images_CROHME_training_2011/Tra

In [11]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/MatricesTest2014/MatricesTest'  # Change to your actual folder name
    output_folder = 'Images_Matrices_Test_2014'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_116.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_18.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_9.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_25.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_60.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_44.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_79.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_84.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_86.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_46.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_27.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_62.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_114.png
Saved image to Images_Matrices_Test_2014/RIT_MatrixTest_2014_23.png
Saved image to Images_Matrices_Test_2014/RIT_Ma

In [13]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/MatricesTrain2014/MatricesTrain'  # Change to your actual folder name
    output_folder = 'Images_Matrices_Train_2014'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_Matrices_Train_2014/7_em_43.png
Saved image to Images_Matrices_Train_2014/103_em_72.png
Saved image to Images_Matrices_Train_2014/15_gt_2.png
Saved image to Images_Matrices_Train_2014/6_em_3.png
Saved image to Images_Matrices_Train_2014/40_gt_2.png
Saved image to Images_Matrices_Train_2014/20_gt_1.png
Saved image to Images_Matrices_Train_2014/6_em_10.png
Saved image to Images_Matrices_Train_2014/11_em_92.png
Saved image to Images_Matrices_Train_2014/102_em_40.png
Saved image to Images_Matrices_Train_2014/10_em_60.png
Saved image to Images_Matrices_Train_2014/11_em_76.png
Saved image to Images_Matrices_Train_2014/103_em_56.png
Saved image to Images_Matrices_Train_2014/104_em_83.png
Saved image to Images_Matrices_Train_2014/102_em_38.png
Saved image to Images_Matrices_Train_2014/11_em_89.png
Saved image to Images_Matrices_Train_2014/104_em_81.png
Saved image to Images_Matrices_Train_2014/101_em_19.png
Saved image to Images_Matrices_Train_2014/103_em_54.png
Saved ima

In [15]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/testData_2012'  # Change to your actual folder name
    output_folder = 'Images_testData_2012'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_testData_2012/formulaire051-equation073.png
Saved image to Images_testData_2012/formulaire048-equation034.png
Saved image to Images_testData_2012/formulaire050-equation048.png
Saved image to Images_testData_2012/formulaire047-equation026.png
Saved image to Images_testData_2012/001-equation003.png
Saved image to Images_testData_2012/KME2G3_7_sub_66.png
Saved image to Images_testData_2012/formulaire054-equation055.png
Saved image to Images_testData_2012/formulaire042-equation000.png
Saved image to Images_testData_2012/KME1G3_8_sub_20.png
Saved image to Images_testData_2012/formulaire041-equation059.png
Saved image to Images_testData_2012/formulaire043-equation067.png
Saved image to Images_testData_2012/formulaire042-equation019.png
Saved image to Images_testData_2012/KME1G3_0_sub_29.png
Saved image to Images_testData_2012/formulaire055-equation032.png
Saved image to Images_testData_2012/KME1G3_9_sub_26.png
Saved image to Images_testData_2012/KME2G3_8_sub_52.png
Save

In [17]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/TestINKML_2013/TestINKML'  # Change to your actual folder name
    output_folder = 'Images_testData_2013'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_testData_2013/126_em_481.png
Saved image to Images_testData_2013/125_em_428.png
Saved image to Images_testData_2013/120_em_293.png
Saved image to Images_testData_2013/123_em_362.png
Saved image to Images_testData_2013/105_em_71.png
Saved image to Images_testData_2013/122_em_340.png
Saved image to Images_testData_2013/ritm_429_0.png
Saved image to Images_testData_2013/127_em_506.png
Saved image to Images_testData_2013/121_em_314.png
Saved image to Images_testData_2013/121_em_329.png
Saved image to Images_testData_2013/106_em_102.png
Saved image to Images_testData_2013/122_em_338.png
Saved image to Images_testData_2013/118_em_226.png
Saved image to Images_testData_2013/rit_42185_2.png
Saved image to Images_testData_2013/115_em_139.png
Saved image to Images_testData_2013/128_em_516.png
Saved image to Images_testData_2013/rit_420_3.png
Saved image to Images_testData_2013/125_em_450.png
Saved image to Images_testData_2013/122_em_359.png
Saved image to Images_testData_2

In [19]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/trainData_2012_part1/trainData_2012_part1'  # Change to your actual folder name
    output_folder = 'Images_trainData_2012_part1'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_trainData_2012_part1/TrainData2_19_sub_98.png
Saved image to Images_trainData_2012_part1/TrainData2_19_sub_20.png
Saved image to Images_trainData_2012_part1/TrainData2_1_sub_33.png
Saved image to Images_trainData_2012_part1/TrainData2_0_sub_11.png
Saved image to Images_trainData_2012_part1/TrainData2_21_sub_95.png
Saved image to Images_trainData_2012_part1/TrainData2_16_sub_88.png
Saved image to Images_trainData_2012_part1/TrainData2_14_sub_46.png
Saved image to Images_trainData_2012_part1/TrainData2_6_sub_33.png
Saved image to Images_trainData_2012_part1/TrainData2_7_sub_11.png
Saved image to Images_trainData_2012_part1/TrainData2_3_sub_6.png
Saved image to Images_trainData_2012_part1/TrainData2_11_sub_88.png
Saved image to Images_trainData_2012_part1/TrainData2_26_sub_95.png
Saved image to Images_trainData_2012_part1/TrainData2_13_sub_46.png
Saved image to Images_trainData_2012_part1/TrainData2_2_sub_43.png
Saved image to Images_trainData_2012_part1/TrainData2_3

In [21]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/trainData_2012_part2/trainData_2012_part2'  # Change to your actual folder name
    output_folder = 'Images_trainData_2012_part2'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_trainData_2012_part2/TrainData1_5_sub_21.png
Saved image to Images_trainData_2012_part2/KME1G3_4_sub_21.png
Saved image to Images_trainData_2012_part2/TrainData1_7_sub_12.png
Saved image to Images_trainData_2012_part2/KME2G3_22_sub_81.png
Saved image to Images_trainData_2012_part2/KME2G3_21_sub_28.png
Saved image to Images_trainData_2012_part2/KME2G3_16_sub_35.png
Saved image to Images_trainData_2012_part2/TrainData1_2_sub_21.png
Saved image to Images_trainData_2012_part2/KME1G3_3_sub_21.png
Saved image to Images_trainData_2012_part2/TrainData1_0_sub_12.png
Saved image to Images_trainData_2012_part2/TrainData1_0_sub_5.png
Saved image to Images_trainData_2012_part2/KME2G3_25_sub_81.png
Saved image to Images_trainData_2012_part2/KME2G3_30_sub_65.png
Saved image to Images_trainData_2012_part2/KME2G3_11_sub_35.png
Saved image to Images_trainData_2012_part2/KME2G3_26_sub_28.png
Saved image to Images_trainData_2012_part2/formulaire001-equation070.png
Saved image to Imag

In [23]:
# Example usage
if __name__ == '__main__':
    input_folder = 'archive/TrainINKML_2013'  # Change to your actual folder name
    output_folder = 'Images_train_InkML_2013'

    convert_inkml_folder_to_images(input_folder, output_folder)

Saved image to Images_train_InkML_2013/65_herbert.png
Saved image to Images_train_InkML_2013/2009213-139-221.png
Saved image to Images_train_InkML_2013/2009213-137-152.png
Saved image to Images_train_InkML_2013/formulaire039-equation022.png
Saved image to Images_train_InkML_2013/200924-1312-132.png
Saved image to Images_train_InkML_2013/2009212-952-43.png
Saved image to Images_train_InkML_2013/2009213-137-117.png
Saved image to Images_train_InkML_2013/formulaire039-equation067.png
Saved image to Images_train_InkML_2013/200924-1312-177.png
Saved image to Images_train_InkML_2013/200923-131-78.png
Saved image to Images_train_InkML_2013/200923-1553-139.png
Saved image to Images_train_InkML_2013/2009210-947-44.png
Saved image to Images_train_InkML_2013/TrainData1_5_sub_21.png
Saved image to Images_train_InkML_2013/2009210-947-124.png
Saved image to Images_train_InkML_2013/KME1G3_4_sub_21.png
Saved image to Images_train_InkML_2013/TrainData2_19_sub_98.png
Saved image to Images_train_InkML_20

In [25]:
import os
import xml.etree.ElementTree as ET
import csv

def extract_latex_from_inkml(inkml_file_path):
    """
    Parses the InkML file and extracts the LaTeX label from the <annotation> tag.
    """
    tree = ET.parse(inkml_file_path)
    root = tree.getroot()
    
    # Loop over all annotations and return the first one with 'truth' or 'latex'
    for annotation in root.findall('{http://www.w3.org/2003/InkML}annotation'):
        if annotation.attrib.get('type', '').lower() in ['truth', 'latex']:
            return annotation.text.strip()
    
    return None  # fallback if no label found

def create_image_label_csv(inkml_folder, image_folder, output_csv_path):
    """
    Matches inkml files and images by filename, extracts LaTeX, and writes (image_path, label) to CSV.
    """
    rows = []
    for filename in os.listdir(inkml_folder):
        if filename.endswith('.inkml'):
            base = os.path.splitext(filename)[0]
            inkml_path = os.path.join(inkml_folder, filename)
            image_path = os.path.join(image_folder, base + '.png')
            
            if not os.path.exists(image_path):
                print(f"⚠️ Image not found for {filename}")
                continue
            
            latex = extract_latex_from_inkml(inkml_path)
            if latex:
                rows.append([image_path, latex])
            else:
                print(f"⚠️ No LaTeX label found in {filename}")

    # Save to CSV
    with open(output_csv_path, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['image_path', 'latex'])
        writer.writerows(rows)

    print(f"✅ Saved {len(rows)} pairs to {output_csv_path}")

In [27]:
if __name__ == '__main__':
    inkml_folder = 'archive/CROHME_test_2011'
    image_folder = 'Images_CROHME_test_2011'
    output_csv = 'CROHME_test_2011_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc198_fi5_db144234.inkml
⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc132_fi5_db141234.inkml
⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc372_fi6_db141415.inkml
⚠️ No LaTeX label found in formulaire052-equation017.inkml
⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc174_fi4_db143083.inkml
⚠️ No LaTeX label found in TestData2_1_sub_14.inkml
⚠️ No LaTeX label found in formulaire054-equation055.inkml
⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc576_fi6_db139525.inkml
⚠️ No LaTeX label found in TestData2_3_sub_27.inkml
⚠️ No LaTeX label found in TestData2_3_sub_62.inkml
⚠️ No LaTeX label found in TestData2_2_sub_40.inkml
⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc273_fi5_db137334.inkml
⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc458_fi6_db144725.inkml
⚠️ No LaTeX label found in Inkdata_temp_InkFR_HPR_EQU_NOC_scc408_fi5_db142784.inkml
⚠️

In [29]:
if __name__ == '__main__':
    inkml_folder = 'archive/MatricesTest2014/MatricesTest'
    image_folder = 'Images_Matrices_Test_2014'
    output_csv = 'Matrices_Test_2014_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

✅ Saved 122 pairs to Matrices_Test_2014_labels.csv


In [31]:
if __name__ == '__main__':
    inkml_folder = 'archive/MatricesTrain2014/MatricesTrain'
    image_folder = 'Images_Matrices_Train_2014'
    output_csv = 'Matrices_Train_2014_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

✅ Saved 256 pairs to Matrices_Train_2014_labels.csv


In [33]:
if __name__ == '__main__':
    inkml_folder = 'archive/testData_2012'
    image_folder = 'Images_testData_2012'
    output_csv = 'testData_2012_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

⚠️ No LaTeX label found in formulaire051-equation073.inkml
⚠️ No LaTeX label found in formulaire048-equation034.inkml
⚠️ No LaTeX label found in formulaire050-equation048.inkml
⚠️ No LaTeX label found in formulaire047-equation026.inkml
⚠️ No LaTeX label found in 001-equation003.inkml
⚠️ No LaTeX label found in KME2G3_7_sub_66.inkml
⚠️ No LaTeX label found in formulaire054-equation055.inkml
⚠️ No LaTeX label found in formulaire042-equation000.inkml
⚠️ No LaTeX label found in KME1G3_8_sub_20.inkml
⚠️ No LaTeX label found in formulaire041-equation059.inkml
⚠️ No LaTeX label found in formulaire043-equation067.inkml
⚠️ No LaTeX label found in formulaire042-equation019.inkml
⚠️ No LaTeX label found in KME1G3_0_sub_29.inkml
⚠️ No LaTeX label found in formulaire055-equation032.inkml
⚠️ No LaTeX label found in KME1G3_9_sub_26.inkml
⚠️ No LaTeX label found in KME2G3_8_sub_52.inkml
⚠️ No LaTeX label found in KME2G3_9_sub_70.inkml
⚠️ No LaTeX label found in formulaire046-equation041.inkml
⚠️ No La

In [35]:
if __name__ == '__main__':
    inkml_folder = 'archive/TestINKML_2013/TestINKML'
    image_folder = 'Images_testData_2013'
    output_csv = 'testData_2013_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

⚠️ No LaTeX label found in 126_em_481.inkml
⚠️ No LaTeX label found in 125_em_428.inkml
⚠️ No LaTeX label found in 120_em_293.inkml
⚠️ No LaTeX label found in 123_em_362.inkml
⚠️ No LaTeX label found in 105_em_71.inkml
⚠️ No LaTeX label found in 122_em_340.inkml
⚠️ No LaTeX label found in ritm_429_0.inkml
⚠️ No LaTeX label found in 127_em_506.inkml
⚠️ No LaTeX label found in 121_em_314.inkml
⚠️ No LaTeX label found in 121_em_329.inkml
⚠️ No LaTeX label found in 106_em_102.inkml
⚠️ No LaTeX label found in 122_em_338.inkml
⚠️ No LaTeX label found in 118_em_226.inkml
⚠️ No LaTeX label found in rit_42185_2.inkml
⚠️ No LaTeX label found in 115_em_139.inkml
⚠️ No LaTeX label found in 128_em_516.inkml
⚠️ No LaTeX label found in rit_420_3.inkml
⚠️ No LaTeX label found in 125_em_450.inkml
⚠️ No LaTeX label found in 122_em_359.inkml
⚠️ No LaTeX label found in 105_em_68.inkml
⚠️ No LaTeX label found in rit_4270_3.inkml
⚠️ No LaTeX label found in 123_em_386.inkml
⚠️ No LaTeX label found in rit_426

In [None]:
archive/trainData_2012_part1/trainData_2012_part1

In [37]:
if __name__ == '__main__':
    inkml_folder = 'archive/trainData_2012_part1/trainData_2012_part1'
    image_folder = 'Images_trainData_2012_part1'
    output_csv = 'trainData_2012_part1_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

✅ Saved 630 pairs to trainData_2012_part1_labels.csv


In [39]:
if __name__ == '__main__':
    inkml_folder = 'archive/trainData_2012_part2/trainData_2012_part2'
    image_folder = 'Images_trainData_2012_part2'
    output_csv = 'trainData_2012_part2_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

✅ Saved 708 pairs to trainData_2012_part2_labels.csv


In [41]:
if __name__ == '__main__':
    inkml_folder = 'archive/TrainINKML_2013'
    image_folder = 'Images_train_InkML_2013'
    output_csv = 'train_InkML_2013_labels.csv'

    create_image_label_csv(inkml_folder, image_folder, output_csv)

⚠️ Image not found for MfrDB0104.inkml
⚠️ Image not found for MfrDB3088.inkml
✅ Saved 8834 pairs to train_InkML_2013_labels.csv
