# Demo

In [1]:
import numpy as np
import os
import random
from sys import platform as sys_pf
import matplotlib

In [2]:
if sys_pf == 'darwin':
    matplotlib.use("TkAgg")
from matplotlib import pyplot as plt
# ---
# Demo for how to load image and stroke data for a character
# ---

# Plot the motor trajectory over an image
#
# Input
#  I [105 x 105 nump] grayscale image
#  drawings: [ns list] of strokes (numpy arrays) in motor space
#  lw : line width
def plot_motor_to_image(I,drawing,lw=2):
    drawing = [d[:,0:2] for d in drawing] # strip off the timing data (third column)
    drawing = [space_motor_to_img(d) for d in drawing] # convert to image space
    plt.imshow(I,cmap='gray')
    ns = len(drawing)
    for sid in range(ns): # for each stroke
        plot_traj(drawing[sid],get_color(sid),lw)
    plt.xticks([])
    plt.yticks([])

# Plot individual stroke
#
# Input
#  stk: [n x 2] individual stroke
#  color: stroke color
#  lw: line width
def plot_traj(stk,color,lw):
    n = stk.shape[0]
    if n > 1:
        plt.plot(stk[:,0],stk[:,1],color=color,linewidth=lw)
    else:
        plt.plot(stk[0,0],stk[0,1],color=color,linewidth=lw,marker='.')

# Color map for the stroke of index k
def get_color(k):
    scol = ['r','g','b','m','c']
    ncol = len(scol)
    if k < ncol:
       out = scol[k]
    else:
       out = scol[-1]
    return out

# convert to str and add leading zero to single digit numbers
def num2str(idx):
       if idx < 10:
        return '0'+str(idx)
       return str(idx)

# Load binary image for a character
#
# fn : filename
def load_img(fn):
       I = plt.imread(fn)
       I = np.array(I,dtype=bool)
       return I

# Load stroke data for a character from text file
#
# Input
#   fn : filename
#
# Output
#   motor : list of strokes (each is a [n x 3] numpy array)
#      first two columns are coordinates
#	   the last column is the timing data (in milliseconds)
def load_motor(fn):
    motor = []
    with open(fn,'r') as fid:
        lines = fid.readlines()
    lines = [l.strip() for l in lines]
    for myline in lines:
        if myline =='START': # beginning of character
            stk = []
        elif myline =='BREAK': # break between strokes
            stk = np.array(stk)
            motor.append(stk) # add to list of strokes
            stk = [] 
        else:
            arr = np.fromstring(myline,dtype=float,sep=',')
            stk.append(arr)
    return motor

#
# Map from motor space to image space (or vice versa)
#
# Input
#   pt: [n x 2] points (rows) in motor coordinates
#
# Output
#  new_pt: [n x 2] points (rows) in image coordinates
def space_motor_to_img(pt):
    pt[:,1] = -pt[:,1]
    return pt
def space_img_to_motor(pt):
    pt[:,1] = -pt[:,1]
    return

if __name__ == "__main__":
    img_dir = 'images_background'
    stroke_dir = 'strokes_background'
    nreps = 20 # number of renditions for each character
    nalpha = 5 # number of alphabets to show

    alphabet_names = [a for a in os.listdir(img_dir) if a[0] != '.'] # get folder names
    alphabet_names = random.sample(alphabet_names,nalpha) # choose random alphabets

    for a in range(nalpha): # for each alphabet
        print('generating figure ' + str(a+1) + ' of ' + str(nalpha))
        alpha_name = alphabet_names[a]
        
        # choose a random character from the alphabet
        character_id = random.randint(1,len(os.listdir(os.path.join(img_dir,alpha_name))))

        # get image and stroke directories for this character
        img_char_dir = os.path.join(img_dir,alpha_name,'character'+num2str(character_id))
        stroke_char_dir = os.path.join(stroke_dir,alpha_name,'character'+num2str(character_id))

        # get base file name for this character
        fn_example = os.listdir(img_char_dir)[0]
        fn_base = fn_example[:fn_example.find('_')] 

        plt.figure(a,figsize=(10,8))
        plt.clf()
        for r in range(1,nreps+1): # for each rendition
            plt.subplot(4,5,r)
            fn_stk = stroke_char_dir + '/' + fn_base + '_' + num2str(r) + '.txt'
            fn_img = img_char_dir + '/' + fn_base + '_' + num2str(r) + '.png'			
            motor = load_motor(fn_stk)
            I = load_img(fn_img)
            plot_motor_to_image(I,motor)
            if r==1:
                plt.title(alpha_name[:15] + '\n character ' + str(character_id))
plt.tight_layout()
plt.show()

generating figure 1 of 5
generating figure 2 of 5
generating figure 3 of 5
generating figure 4 of 5
generating figure 5 of 5


# Stroke Counter

In [3]:
import os
import random

# Set the directories for the dataset
img_dir = 'images_background'
stroke_dir = 'strokes_background'

# Define a function to count the number of strokes in a character's stroke data
def count_strokes(character_strokes):
    return len(character_strokes)

# Get the list of alphabet folders
alphabet_names = [a for a in os.listdir(img_dir) if a[0] != '.']

# Choose a random alphabet
alphabet_name = random.choice(alphabet_names)

# Get the list of character folders for the chosen alphabet
character_names = os.listdir(os.path.join(img_dir, alphabet_name))

# Choose a random character
character_name = random.choice(character_names)

# Get the stroke data for the chosen character
stroke_char_dir = os.path.join(stroke_dir, alphabet_name, character_name)

# Get the list of stroke files for the character
stroke_files = os.listdir(stroke_char_dir)

# Choose a random stroke file
stroke_file = random.choice(stroke_files)

# Load the stroke data from the file
stroke_data = load_motor(os.path.join(stroke_char_dir, stroke_file))

# Count the number of strokes in the character
num_strokes = count_strokes(stroke_data)

# Print the result with the alphabet name and character name
print(f"Alphabet: {alphabet_name}")
print(f"Character: {character_name}")
print(f"Number of Strokes: {num_strokes}")

Alphabet: Cyrillic
Character: character30
Number of Strokes: 1


# Interpolation
The stroke data in the Omniglot dataset is provided as raw pen coordinates with non-uniform spatial and temporal sampling intervals. To ensure consistent analysis and processing, it is necessary to perform interpolation to obtain uniform intervals either in space (coordinates) or time.

## <u>Linear:</u>

Linear interpolation is a simpler method that assumes a linear relationship between the known data points. It works well when the stroke data has a relatively simple and consistent pattern. It is computationally efficient and can provide reasonable results in many cases.

In [4]:
import os
import numpy as np

# Specify the directory for stroke data
stroke_dir = 'strokes_background'

# Load motor space to image space conversion functions
def space_motor_to_img(pt):
    pt[:, 1] = -pt[:, 1]
    return pt

def space_img_to_motor(pt):
    pt[:, 1] = -pt[:, 1]
    return pt

# Perform linear interpolation for stroke data
def linear_interpolation(stroke_data):
    interpolated_data = []
    for stroke in stroke_data:
        interpolated_stroke = []
        prev_point = stroke[0]  # First point of the stroke
        interpolated_stroke.append(prev_point)

        for point in stroke[1:]:
            # Calculate the number of intermediate points to interpolate
            num_intermediate_points = int(np.ceil(point[2])) - int(np.ceil(prev_point[2])) - 1

            # Interpolate the intermediate points using linear interpolation
            for i in range(num_intermediate_points):
                alpha = (i + 1) / (num_intermediate_points + 1)
                interpolated_point = prev_point + alpha * (point - prev_point)
                interpolated_stroke.append(interpolated_point)

            interpolated_stroke.append(point)
            prev_point = point

        interpolated_data.append(np.array(interpolated_stroke))

    return interpolated_data

In [5]:
# EXAMPLE usage for a stroke file:
alphabet_name = 'bengali'
character_name = 'character01'
stroke_char_dir = os.path.join(stroke_dir, alphabet_name, character_name)

# Get the list of stroke files for the character
stroke_files = os.listdir(stroke_char_dir)

# Choose a stroke file
stroke_file = stroke_files[0]  # Select the first stroke file for demonstration

# Load the stroke data from the file
stroke_data = load_motor(os.path.join(stroke_char_dir, stroke_file))

# Perform linear interpolation on the stroke data
interpolated_data = linear_interpolation(stroke_data)

# Plot the interpolated stroke data
plt.figure()
plt.title(f"Interpolated data for {stroke_file}")
for stroke in interpolated_data:
    plt.plot(stroke[:, 0], stroke[:, 1])
plt.show()
plt.close()

# Print the interpolated stroke data
for stroke in interpolated_data:
    print(stroke)

[[ 4.83775980e+01 -4.13024640e+01  0.00000000e+00]
 [ 4.83648861e+01 -4.13024640e+01  1.00000000e+00]
 [ 4.83521743e+01 -4.13024640e+01  2.00000000e+00]
 ...
 [ 1.66894624e+01 -2.21736504e+01  1.62800000e+03]
 [ 1.65928522e+01 -2.20770402e+01  1.62900000e+03]
 [ 1.64962420e+01 -2.19804300e+01  1.63000000e+03]]
[[  60.93692     -39.370261   2939.        ]
 [  60.96303086  -39.39637184 2940.        ]
 [  60.98914173  -39.42248268 2941.        ]
 ...
 [  72.530141    -19.25777991 4247.        ]
 [  72.530141    -19.16995245 4248.        ]
 [  72.530141    -19.082125   4249.        ]]
[[  33.886073    -13.285515   5391.        ]
 [  33.89930727  -13.285515   5392.        ]
 [  33.91254155  -13.285515   5393.        ]
 ...
 [  90.73744192  -16.18382    6075.        ]
 [  90.81175746  -16.18382    6076.        ]
 [  90.886073    -16.18382    6077.        ]]
[[  75.428446    -16.18382    6874.        ]
 [  75.40831887  -16.18382    6875.        ]
 [  75.38819175  -16.18382    6876.        ]
 

In [6]:
# EXAMPLE usage for multiple stroke files:
alphabet_name = 'bengali'
character_name = 'character01'
stroke_char_dir = os.path.join(stroke_dir, alphabet_name, character_name)

# Get the list of stroke files for the character
stroke_files = os.listdir(stroke_char_dir)

# Iterate over all stroke files
for stroke_file in stroke_files:
    # Load the stroke data from the file
    stroke_data = load_motor(os.path.join(stroke_char_dir, stroke_file))

    # Perform linear interpolation on the stroke data
    interpolated_data = linear_interpolation(stroke_data)

#     # Print the interpolated stroke data
#     print(f"Interpolated data for {stroke_file}:")
#     for stroke in interpolated_data:
#         print(stroke)
#         print("---")
        
# Plot the interpolated stroke data
plt.figure()
plt.title(f"Interpolated data for {stroke_file}")
for stroke in interpolated_data:
    plt.plot(stroke[:, 0], stroke[:, 1])
plt.show()
plt.close()

## <u>Spline:</u>
Spline interpolation is a more flexible method that fits a smooth curve through the data points. It is suitable for more complex patterns or cases where we want a smoother representation of the strokes. Spline interpolation can capture more intricate details and provide a more accurate representation of the stroke data. However, spline interpolation might be computationally more intensive than linear interpolation, especially for large datasets.

In [7]:
import os
import numpy as np
from scipy.interpolate import splprep, splev

# Specify the directory for stroke data
stroke_dir = 'strokes_background'

# Load motor space to image space conversion functions
def space_motor_to_img(pt):
    pt[:, 1] = -pt[:, 1]
    return pt

def space_img_to_motor(pt):
    pt[:, 1] = -pt[:, 1]
    return pt

# Load stroke data for a character from text file
def load_motor(fn):
    motor = []
    with open(fn, 'r') as fid:
        lines = fid.readlines()
    lines = [l.strip() for l in lines]
    for myline in lines:
        if myline == 'START':  # beginning of character
            stk = []
        elif myline == 'BREAK':  # break between strokes
            stk = np.array(stk)
            motor.append(stk)  # add to list of strokes
            stk = []
        else:
            arr = np.fromstring(myline, dtype=float, sep=',')
            stk.append(arr)
    return motor

def spline_interpolation(stroke):
    t = stroke[:, 2]  # Time values
    x = stroke[:, 0]  # X coordinates
    y = stroke[:, 1]  # Y coordinates

    if len(t) < 3:
        # If there are fewer than 3 points, return the original stroke data
        return stroke

    # Perform spline interpolation using scipy's splprep and splev functions
    tck, _ = splprep([x, y], u=t, s=0)
    num_points = int(np.ceil(t[-1])) + 1  # Number of points for interpolation
    t_interp = np.linspace(t[0], t[-1], num_points)
    x_interp, y_interp = splev(t_interp, tck)

    # Combine interpolated coordinates with original time values
    interpolated_stroke = np.column_stack((x_interp, y_interp, t_interp))

    return interpolated_stroke

In [8]:
# EXAMPLE usage
alphabet_name = 'bengali'
character_name = 'character01'

# Specify the directory for stroke data
stroke_dir = 'strokes_background'

# Get the list of stroke files for the specified alphabet and character
stroke_files_dir = os.path.join(stroke_dir, alphabet_name, character_name)
stroke_files = os.listdir(stroke_files_dir)

# Iterate over the stroke files and perform spline interpolation
for stroke_file in stroke_files:
    stroke_file_path = os.path.join(stroke_files_dir, stroke_file)

    # Load the stroke data from the file
    stroke_data = load_motor(stroke_file_path)

    # Prepare lists to store all original and interpolated strokes
    original_strokes = []
    interpolated_strokes = []

    # Perform spline interpolation on each stroke in the file
    for stroke in stroke_data:
        # Append the original stroke to the list
        original_strokes.append(stroke)

        # Perform spline interpolation on the stroke
        interpolated_stroke = spline_interpolation(stroke)
        interpolated_strokes.append(interpolated_stroke)

    # Plot all the strokes in a single figure
    plt.figure()
    plt.title(f"Original and Interpolated Strokes - {stroke_file}")

    # Plot the original strokes
    for stroke in original_strokes:
        plt.plot(stroke[:, 0], stroke[:, 1], 'b')

    # Plot the interpolated strokes
    for stroke in interpolated_strokes:
        plt.plot(stroke[:, 0], stroke[:, 1], 'r')

    plt.show()

#     # Print the interpolated stroke data
#     print(f"Interpolated data for {stroke_file}:")
#     for i, stroke in enumerate(interpolated_strokes):
#         print(f"Stroke {i+1}:")
#         print(stroke)
#         print()


In [9]:
### Visualizing the Interpolated Strokes:

# import matplotlib.pyplot as plt

# # Load the original image
# original_image = plt.imread("/Users/fatimaadmin/Documents/Alphabets/images_background/Bengali/character01/0132_01.png")

# # If there's no original image, uncomment this to reate a blank canvas 
# # blank_canvas = np.zeros((image_height, image_width))

# # Plot the original image
# plt.imshow(original_image, cmap='gray')

# # Plot the interpolated strokes
# for interpolated_stroke in interpolated_data:
#     # Plot the interpolated stroke points
#     plt.plot(interpolated_stroke[:, 0], interpolated_stroke[:, 1], 'b-', linewidth=2)

# plt.title("Interpolated Strokes")
# plt.axis('off')
# plt.show()

# NOTE: Note sure about this... Interpolated stroke is wrong?? fix this

# Stroke Shape

## 1. Length: (still needs debugging)

In [10]:
import os
import numpy as np
from scipy.interpolate import splprep, splev

def calculate_stroke_length(stroke_data):
    # Perform spline interpolation on the stroke data
    x = [point[0] for point in stroke_data]  # Extract x coordinates
    y = [point[1] for point in stroke_data]  # Extract y coordinates
    tck, _ = splprep([x, y], s=0)  # Spline interpolation
    u = np.linspace(0, 1, num=1000)  # Parameter values for evaluation
    interpolated_points = np.column_stack(splev(u, tck))  # Interpolated points

    # Calculate the total length of the interpolated stroke
    total_length = 0.0
    for i in range(len(interpolated_points) - 1):
        x1, y1 = interpolated_points[i]
        x2, y2 = interpolated_points[i + 1]
        distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
        total_length += distance

    return total_length

In [11]:
# EXAMPLE
alphabet_name = 'bengali'
character_name = 'character01'

# Get the list of stroke files for the specified alphabet and character
stroke_files_dir = os.path.join('strokes_background', alphabet_name, character_name)
stroke_files = os.listdir(stroke_files_dir)

# Iterate over the stroke files and calculate the stroke length
for stroke_file in stroke_files:
    stroke_file_path = os.path.join(stroke_files_dir, stroke_file)

    # Load the stroke data from the file
    with open(stroke_file_path, 'r') as file:
        stroke_data = []
        for line in file:
            if line.strip() == 'START' or line.strip() == 'BREAK':
                continue
            point = [float(coord) for coord in line.strip().split(',')]
            stroke_data.append(point)

    # Calculate the stroke length
    stroke_length = calculate_stroke_length(stroke_data)
    print(f"Stroke length for {stroke_file}: {stroke_length}")

Stroke length for 0132_18.txt: 372.00985848788036
Stroke length for 0132_19.txt: 315.87165111073745
Stroke length for 0132_09.txt: 479.1146619154323
Stroke length for 0132_20.txt: 373.40832531685675
Stroke length for 0132_08.txt: 515.4353367389808
Stroke length for 0132_05.txt: 399.3327134629787
Stroke length for 0132_11.txt: 395.2878025898889
Stroke length for 0132_10.txt: 380.4264410803669
Stroke length for 0132_04.txt: 426.5082335565405
Stroke length for 0132_12.txt: 402.3792206261718
Stroke length for 0132_06.txt: 364.17774995632357
Stroke length for 0132_07.txt: 343.572533813146
Stroke length for 0132_13.txt: 363.28825184568933
Stroke length for 0132_17.txt: 404.8115679772364
Stroke length for 0132_03.txt: 298.58701257855614
Stroke length for 0132_02.txt: 380.092453711694
Stroke length for 0132_16.txt: 414.8344180841333
Stroke length for 0132_14.txt: 281.78919598150395
Stroke length for 0132_15.txt: 326.9117280955295
Stroke length for 0132_01.txt: 526.5724295435848


In [12]:
# I keep getting an invalid input error when I try different alphabet names? 

In [13]:
import os
import random
import numpy as np
from scipy.interpolate import splprep, splev

# Function to calculate stroke length
def calculate_stroke_length(stroke_data):
    x = [point[0] for point in stroke_data]
    y = [point[1] for point in stroke_data]
    tck, _ = splprep([x, y], s=0)
    u = np.linspace(0, 1, num=1000)
    interpolated_points = np.column_stack(splev(u, tck))

    total_length = 0.0
    for i in range(len(interpolated_points) - 1):
        x1, y1 = interpolated_points[i]
        x2, y2 = interpolated_points[i + 1]
        distance = np.sqrt((x2 - x1)**2 + (y2 - y1)**2)
        total_length += distance

    return total_length

if __name__ == "__main__":
    alphabet_name = 'greek'
    character_name = 'character01'

    # Get the list of stroke files for the specified alphabet and character
    stroke_files_dir = os.path.join('strokes_background', alphabet_name, character_name)
    stroke_files = os.listdir(stroke_files_dir)

    # Iterate over the stroke files and calculate the stroke length
    for stroke_file in stroke_files:
        stroke_file_path = os.path.join(stroke_files_dir, stroke_file)

        # Load the stroke data from the file
        with open(stroke_file_path, 'r') as file:
            stroke_data = []
            for line in file:
                if line.strip() == 'START' or line.strip() == 'BREAK':
                    continue
                point = [float(coord) for coord in line.strip().split(',')]
                stroke_data.append(point)

        # Check if the stroke data is empty
        if len(stroke_data) == 0:
            print("Stroke data is empty for", stroke_file_path)
            continue

        try:
            # Calculate the stroke length
            stroke_length = calculate_stroke_length(stroke_data)
            print(f"Stroke length for {stroke_file}: {stroke_length}")
        except Exception as e:
            print("Error during spline interpolation for", stroke_file_path, ":", e)


Stroke length for 0394_02.txt: 169.43557998485926
Stroke length for 0394_16.txt: 217.01684483433922
Stroke length for 0394_17.txt: 230.36512315822645
Stroke length for 0394_03.txt: 145.94474809118026
Stroke length for 0394_15.txt: 171.97392762463485
Stroke length for 0394_01.txt: 169.14941587694258
Stroke length for 0394_14.txt: 169.8106160488763
Stroke length for 0394_10.txt: 189.91355586157306
Stroke length for 0394_04.txt: 216.36480074216013
Stroke length for 0394_05.txt: 191.87699468966912
Stroke length for 0394_11.txt: 295.0751880063626
Error during spline interpolation for strokes_background/greek/character01/0394_07.txt : Invalid inputs.
Stroke length for 0394_13.txt: 197.31394926201557
Stroke length for 0394_12.txt: 194.03631511300506
Stroke length for 0394_06.txt: 210.36754141067644
Stroke length for 0394_08.txt: 198.20622422622284
Stroke length for 0394_20.txt: 171.81754161145648
Stroke length for 0394_09.txt: 193.45418046129595
Stroke length for 0394_19.txt: 198.561727939651