# Parse Quaternion String

*Version 1*

In [1]:
# from cayley_dickson_alg import Zi
# from random import randint
# import regex as re

In [2]:
import re

# qterm_pat = r'^[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?[ijk]?$'
qterm_pat = r'^[-+]?((\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)?[ijk]?$'

# Test cases
numbers = ["123", "-45", "3.14", "-0.5", ".75", "1.", "-2.0e5", "1.23E-4", "abc", "1.2.3"]
qterms = ["+i", "-j", "123i", "-45j", "3.14k", "-0.5i", ".75j", "1.k", "-2.0e5i", "1.23E-4j", "abck", "1.2.3i"]

for num_str in numbers + qterms:
    if re.match(qterm_pat, num_str):
        print(f"'{num_str}' is a valid quaternion term.")
    else:
        print(f"'{num_str}' is NOT a valid quaternion term.")

'123' is a valid quaternion term.
'-45' is a valid quaternion term.
'3.14' is a valid quaternion term.
'-0.5' is a valid quaternion term.
'.75' is a valid quaternion term.
'1.' is a valid quaternion term.
'-2.0e5' is a valid quaternion term.
'1.23E-4' is a valid quaternion term.
'abc' is NOT a valid quaternion term.
'1.2.3' is NOT a valid quaternion term.
'+i' is a valid quaternion term.
'-j' is a valid quaternion term.
'123i' is a valid quaternion term.
'-45j' is a valid quaternion term.
'3.14k' is a valid quaternion term.
'-0.5i' is a valid quaternion term.
'.75j' is a valid quaternion term.
'1.k' is a valid quaternion term.
'-2.0e5i' is a valid quaternion term.
'1.23E-4j' is a valid quaternion term.
'abck' is NOT a valid quaternion term.
'1.2.3i' is NOT a valid quaternion term.


In [7]:
# Code Golf Quaternion Parser Tests

q_parser_tests = [

    # Test String              Expected Result
    #-------------------------------------------------------------------
    ("1+2i+3j+4k",             [1, 2, 3, 4]),
    ("-1+3i-3j+7k",            [-1, 3, -3, 7]),
    ("-1-4i-9j-2k",            [-1, -4, -9, -2]),
    ("17-16i-15j-14k",         [17, -16, -15, -14]),
    
    ("7+2i",                    [7, 2, 0, 0]),
    ("2i-6k",                   [0, 2, 0, -6]),
    ("1-5j+2k",                 [1, 0, -5, 2]),
    ("3+4i-9k",                 [3, 4, 0, -9]),
    
    ("42i+j-k",                 [0, 42, 1, -1]),
    ("6-2i+j-3k",               [6, -2, 1, -3]),
    ("1+i+j+k",                 [1, 1, 1, 1]),
    ("-1-i-j-k",                [-1, -1, -1, -1]),

    ("16k-20j+2i-7",            [-7, 2, -20, 16]),
    ("i+4k-3j+2",               [2, 1, -3, 4]),
    ("5k-2i+9+3j",              [9, -2, 3, 5]),
    ("5k-2j+3",                 [3, 0, -2, 5]),
    
    ("1.75-1.75i-1.75j-1.75k",  [1.75, -1.75, -1.75, -1.75]),
    ("2.0j-3k+0.47i-13",        [-13, 0.47, 2.0, -3]),  # or [-13 .47 2 -3]
    ("5.6-3i",                  [5.6, -3, 0, 0]),
    ("k-7.6i",                  [0, -7.6, 0, 1]),
    
    ("0",                       [0, 0, 0, 0]),
    ("0j+0k",                   [0, 0, 0, 0]),
    ("-0j",                     [0, 0, 0, 0]), # or [0 0 -0 0]
    ("1-0k",                    [1, 0, 0, 0]),  # or [1 0 0 -0]
    
    ("1+2i+3j+4K",              [1, 2, 3, 4]) # Uses K instead of k
]

In [8]:
def split_into_terms(quat):

    # Make lowercase and remove all spaces
    q0 = quat.lower().strip().replace(' ', '')
    
    # Put single space in front of + & -
    q1 = q0.replace('+', ' +').replace('-', ' -')
    
    # Remove any space after leading parenthesis
    q2 = q1.replace('( ', '(')

    # Remove parentheses, if they exist
    q3 = q2.replace('(', '').replace(')', '').strip()
    
    # Split string at spaces
    q4 = q3.split()
    
    # Make sure each term in the quaternion is valid
    qterm_pat = r'^[-+]?((\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?)?[ijk]?$'
    for term in q4:
        mat = re.match(qterm_pat, term)
        if mat is None:
            raise ValueError(f"In {quat}, the term, {term}, is not a valid quaternion term.")
    else:
        return q4

In [9]:
for test in q_parser_tests:
    print(split_into_terms(test[0]))

['1', '+2i', '+3j', '+4k']
['-1', '+3i', '-3j', '+7k']
['-1', '-4i', '-9j', '-2k']
['17', '-16i', '-15j', '-14k']
['7', '+2i']
['2i', '-6k']
['1', '-5j', '+2k']
['3', '+4i', '-9k']
['42i', '+j', '-k']
['6', '-2i', '+j', '-3k']
['1', '+i', '+j', '+k']
['-1', '-i', '-j', '-k']
['16k', '-20j', '+2i', '-7']
['i', '+4k', '-3j', '+2']
['5k', '-2i', '+9', '+3j']
['5k', '-2j', '+3']
['1.75', '-1.75i', '-1.75j', '-1.75k']
['2.0j', '-3k', '+0.47i', '-13']
['5.6', '-3i']
['k', '-7.6i']
['0']
['0j', '+0k']
['-0j']
['1', '-0k']
['1', '+2i', '+3j', '+4k']


In [None]:
def make_int_or_float(st: str):
    """Cast a string representation of a number into an integer or a float."""
    
    try:
        f_st = float(st)
    except:
        raise ValueError(f"{st} is not a float nor an int")

    i_st = int(f_st)

    return i_st if i_st == f_st else f_st

In [None]:
make_int_or_float('-2.1')

In [None]:
make_int_or_float('-2.0')

In [None]:
make_int_or_float('-2')

In [None]:
make_int_or_float('-2E-3')

In [None]:
def ijk(st: str):
    """Assumes that the input will be one of the terms in a quaternion representation,
    either the real part or one of the unit parts."""
    if   'i' in st: return 'i'
    elif 'j' in st: return 'j'
    elif 'k' in st: return 'k'
    else: return False

In [None]:
ijk('-3i')

In [None]:
ijk('-3')

In [None]:
def coef(st):
    unit = ijk(st)
    if unit:
        if re.fullmatch(r'[+-]?[ijk]', st):
            if len(st) == 1:
                return (unit, 1)
                # qd[unit] = 1
            elif st[0] == '-':
                return (unit, -1)
                # qd[unit] = -1
            elif st[0] == '+':
                return (unit, 1)
                # qd[unit] = 1
            else:
                raise ValueError(f"{st} was unexpected")
        else:
            return (unit, make_int_or_float(st.replace(unit, '')))
            # qd[unit] = make_int_or_float(st.replace(unit, ''))
    else:
        return ('real', make_int_or_float(st))
        # qd['real'] = make_int_or_float(st)

In [None]:
def coefreg(rg):
    return dict(map(coef, rg))

In [None]:
q0 = ' ( -1  -2i -3j +4k  )'

In [None]:
print(f"'{q0}' --> '{coefreg(reg(q0))}'")

In [None]:
coefreg(reg( '  ( i - j ) '))

In [None]:
for test in q_parser_tests:
    print(f"'{test[0]}' --> '{coefreg(reg(test[0]))}'")

## Other Implementations

### CODE GOLF

See the Python entry by R. Kap at ["code golf"](https://codegolf.stackexchange.com/questions/76545/parse-a-quaternion?newreg=3dd347f4682340b89aa845b416c70a4a)

In [None]:
from re import *

def w(r):

    # Substitute all minus (-) and plus (+) signs NOT followed by a number
    # (if there are any) with a "-1"/"+1", respectively.
    a=sub('[+](?![0-9])','+1',sub('[-](?![0-9])','-1',r))
    
    # Lambda function created for later use to sort the Quaternion.
    # This function, when given as a key to the "sorted" function,
    # arranges the input Quaternion in the order where the whole
    # number comes first, and then the rest are placed in order of
    # increasing letter value (i,j,k in this case) 
    q=lambda x:(not x.isdigit(),''.join(filter(str.isalpha,x)))
    
    # The following "for" loop replaces the letters NOT preceded by
    # a number with a one followed by that letter
    for z in findall('(?<![0-9])[a-z]',a):
        a=a.replace(z,('+1{}'.format(z)))
    
    # The following first substitutes all pluses and minuses (+ and -)
    # with a space, and then that new string is split at those spaces,
    # and returned as a list. After that, the list is sorted according
    # the the "lambda" function shown above. Then, the first item in
    # that list, which is supposed to be a lone number, is checked to
    # make sure that it indeed is a lone number. If it isn't, then "+0, "
    # is appended to the Quaternion. 
    if not str(sorted(((sub('[.]','',sub('[+-]',' ',a))).split(' ')),key=q)[0]).isdigit():
        a+='+0, '
    
    # The following "for" loop finds ALL the letters NOT in the list,
    # by finding the symmetric difference between a set of all the
    # letters found, and a set containing all the letters needed.
    # For the letters not in the list, a '+0' is added the quaternion,
    # followed by that letter, and then a comma and a space.
    for i in list(set(findall('[a-z]',a))^{'i','j','k'}):
        a+='+0{}, '.format(i)
    
    # Finally, in this last step, a ", " is added IN BETWEEN unicode
    # characters and pluses/minuses (+/-). Then, it splits at those spaces,
    # and the commas separate different parts of the Quaternion from each
    # other (otherwise, you would get something like `12i+3j+4k` from
    # `2i+3j+4k+1`) in a returned list. Then, that list is sorted according
    # to the lambda expression "q" (above), and then, finally, the NUMBERS
    # (of any type, courtesy to Regex) are extracted from that joined list,
    # and printed out in the correct order.
    # print(findall('[-]?\d+(?:\.\d+)?',''.join(sorted(sub('(?<=[A-Za-z0-9])(?=[+-])',', ',a).split(' '),key=q))))
    result = findall('[-]?\d+(?:\.\d+)?',''.join(sorted(sub('(?<=[A-Za-z0-9])(?=[+-])',', ',a).split(' '),key=q)))
    return list(map(lambda x: float(x), result))

In [None]:
def ok(bool):
    return "OK" if bool else "** WRONG! **"

In [None]:
for test in q_parser_tests:
    print(f"'{test[0]}' --> {w(test[0])}  {ok(w(test[0]) == test[1])}")

### GOOGLE AI

The AI-generated code below is interesting, but it doesn't work and even if it did, it's not written to handle all of the Code Golf testcases.