In [2]:
class TSAFM_Ordinances:
    """
    Implementation of a Non-Deterministic Transition-Assigned Finite State Machine (Mealy Automaton)

    where:
    Q - Set of states
    S - input alphabet
    R - output alphabet
    f - state transition function (Q x S -> Q)
    g - output function (Q x S -> R)
    qi - initial state
    """

    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        """
        Processes an input string and returns the output generated by the FSM.

        Returns:
        - output: the output generated by the FSM for the input string
        """
        q = self.q0
        ordinance_codes = []
        candidate_string = ""
        for symbol_raw in input_str.split(" "):
            token = None
            symbol = symbol_raw.lower()

            # Check if symbol is reserved word
            if symbol in S:
                token = symbol
            # Check if symbol is valid number candidate
            elif len(symbol) > 1:
                # Checks if the symbol is a number, while taking into consideration a 
                # possible trailing comma or period in the string
                if (symbol.isdigit() or symbol[:-1].isdigit()) and symbol[-2].isdigit():
                    token = "_%number%_"

            # If there is no next state given a current state and input,
            # Then go back to state A and reset candidate_string
            q_next = self.f.get((q, token), None)
            if q_next is None:
                q = "A"
                candidate_string = ""
                continue

            # If the next state is A, this means that the candidate string 
            # is now a complete and valid ordinance code.
            # Append candidate_string to the list of ordinance codes taken
            # from given input.
            # Reset candidate string to empty string
            if q_next == "A":
                # Remove trailing comma or period in year if there is any
                if symbol[-1] == "," or symbol[-1] == ".":
                    symbol_raw = symbol_raw[:-1]
                candidate_string += symbol_raw + " "
                ordinance_codes.append(candidate_string.strip())
                candidate_string = ""
                q = q_next
                pass

            # If there is a new state, then append current word to 
            # candidate_string
            candidate_string += symbol_raw + " "
            q = q_next

        return ordinance_codes


"""
Sample NDFSM: Accepts Valid Ordinance Codes
"""
# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D', 'E', 'F', 'G'}
S = {'ordinance', 'no.', '_%number%_', 'series', 'of', ''}
R = {True, False}
f = {('A', 'ordinance'): 'B', ('B', 'no.'): 'C', ('C', '_%number%_'): 'D', ('D', 'series'): 'E', ('E', 'of'): 'G', ('G', '_%number%_'): 'A'}
g = {('A', 'ordinance'): True, ('B', 'no.'): True, ('C', '_%number%_'): True, ('D', 'series'): True, ('E', 'of'): True, ('G', '_%number%_'): True}
qi = 'A'

# Create an instance of the TSAFM
fsm = TSAFM_Ordinances(Q, S, R, f, g, qi)

extracted_ordinances = fsm.run("Mayor Benjamin Magalong signed city ordinance no. 26, series of 2023, approving the request for authority to charge the amount of P28,000 against the 2023 current appropriations of the City Administrator’s Office (CAO)  for the payment of tokens of personalized eco-bag purchased since 2020. The quick brown Ordinance nO. 26,a Ordinance series of 2023 The quick brown ordinance no. 26,  asfasfas fseries of 2023 The quick brown ORdInance no. 26, Series oF 2023")

for ordinance in extracted_ordinances:
    print(ordinance)

"""
Sample NDFSM: Accepts Valid Resolution Codes
"""
# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D', 'E', 'F', 'G'}
S = {'resolution', 'no.', '_%number%_', 'series', 'of', ''}
R = {True, False}
f = {('A', 'resolution'): 'B', ('B', 'no.'): 'C', ('C', '_%number%_'): 'D', ('D', 'series'): 'E', ('E', 'of'): 'G', ('G', '_%number%_'): 'A'}
g = {('A', 'resolution'): True, ('B', 'no.'): True, ('C', '_%number%_'): True, ('D', 'series'): True, ('E', 'of'): True, ('G', '_%number%_'): True}
qi = 'A'

# Create an instance of the TSAFM
fsm = TSAFM_Ordinances(Q, S, R, f, g, qi)

extracted_ordinances = fsm.run("Under Resolution No. 196, series of 2023, The Committee on Laws, Human Rights and Justice, in its 5th endorsement dated March 14, 2023, recommended to note certain management observations and recommendations to be attended to.")

for ordinance in extracted_ordinances:
    print(ordinance)

ordinance no. 26, series of 2023
ORdInance no. 26, Series oF 2023
Resolution No. 196, series of 2023


In [10]:
from unidecode import unidecode

class FSA_CandidateTitleToken:
    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        q = self.q0
        out = 0
        for token in input_str:
            # standardize font style
            token = unidecode(token)
            # check if character is lower alphabet
            if ord(token) > 96 and ord(token) < 123:
                token = "lower"
            # check if character is upper alphabet
            elif ord(token) > 64 and ord(token) < 91:
                token = "upper"
            # check if character is numerical
            elif ord(token) > 47 and ord(token) < 58:
                token = "numnerical"
            # else, character is special character
            else:
                token = "special"
            q_next = self.f.get((q, token), None)
            out = self.g.get((q, token), None)
            q = q_next
            if out == False:
                return False
        return True
    

# Define the parameters for the TSAFM
Q = {'A', 'B', 'C'}
S = {"lower", "upper", "numerical", "special"}
R = {True, False}
f = {('A', 'lower'): 'A', ('A', 'upper'): 'B', ('A', 'numerical'): 'B', ('A', 'special'): 'C', 
     ('B', 'upper'): 'B', ('B', 'numerical'): 'B', ('B', 'special'): 'B', ('B', 'lower'): 'A', 
     ('C', 'lower'): 'A', ('C', 'upper'): 'B', ('C', 'numerical'): 'B', ('C', 'special'): 'C'}
g = {('A', 'lower'): False, ('A', 'upper'): True, ('A', 'numerical'): True, ('A', 'special'): False, 
     ('B', 'upper'): True, ('B', 'numerical'): True, ('B', 'special'): True, ('B', 'lower'): False, 
     ('C', 'lower'): False, ('C', 'upper'): True, ('C', 'numerical'): True, ('C', 'special'): False}
qi = 'A'

sample_text = "𝗣𝗥𝗘𝗦𝗦 𝗙𝗢𝗥𝗨𝗠 𝗙𝗢𝗖𝗨𝗦𝗘𝗦 𝗢𝗡 𝗪𝗢𝗠𝗘𝗡 𝗜𝗡 𝗧𝗛𝗘 𝗠𝗜𝗡𝗜𝗡𝗚 𝗜𝗡𝗗𝗨𝗦𝗧𝗥𝗬 “Mining is a male-dominated industry but slowly and surely it's also being manned by women.” "

# Create an instance of the TSAFM
fsm = FSA_CandidateTitleToken(Q, S, R, f, g, qi)

title = ""
for candidate in sample_text.split(" "):
    if fsm.run(candidate) is False:
        break
    title += candidate + " "
print(title.strip())

𝗣𝗥𝗘𝗦𝗦 𝗙𝗢𝗥𝗨𝗠 𝗙𝗢𝗖𝗨𝗦𝗘𝗦 𝗢𝗡 𝗪𝗢𝗠𝗘𝗡 𝗜𝗡 𝗧𝗛𝗘 𝗠𝗜𝗡𝗜𝗡𝗚 𝗜𝗡𝗗𝗨𝗦𝗧𝗥𝗬
