In [2]:
class TSAFM_Ordinances:
    """
    Implementation of a Non-Deterministic Transition-Assigned Finite State Machine (Mealy Automaton)

    where:
    Q - Set of states
    S - input alphabet
    R - output alphabet
    f - state transition function (Q x S -> Q)
    g - output function (Q x S -> R)
    qi - initial state
    """

    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        """
        Processes an input string and returns the output generated by the FSM.

        Returns:
        - output: the output generated by the FSM for the input string
        """
        q = self.q0
        ordinance_codes = []
        candidate_string = ""
        for symbol_raw in input_str.split(" "):
            token = None
            symbol = symbol_raw.lower()

            # Check if symbol is reserved word
            if symbol in S:
                token = symbol
            # Check if symbol is valid number candidate
            elif len(symbol) > 1:
                # Checks if the symbol is a number, while taking into consideration a 
                # possible trailing comma or period in the string
                if (symbol.isdigit() or symbol[:-1].isdigit()) and symbol[-2].isdigit():
                    token = "_%number%_"

            # If there is no next state given a current state and input,
            # Then go back to state A and reset candidate_string
            q_next = self.f.get((q, token), None)
            if q_next is None:
                q = "A"
                candidate_string = ""
                continue

            # If the next state is A, this means that the candidate string 
            # is now a complete and valid ordinance code.
            # Append candidate_string to the list of ordinance codes taken
            # from given input.
            # Reset candidate string to empty string
            if q_next == "A":
                # Remove trailing comma or period in year if there is any
                if symbol[-1] == "," or symbol[-1] == ".":
                    symbol_raw = symbol_raw[:-1]
                candidate_string += symbol_raw + " "
                ordinance_codes.append(candidate_string.strip())
                candidate_string = ""
                q = q_next
                pass

            # If there is a new state, then append current word to 
            # candidate_string
            candidate_string += symbol_raw + " "
            q = q_next

        return ordinance_codes


"""
Sample NDFSM: Accepts Valid Ordinance Codes
"""
# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D', 'E', 'F', 'G'}
S = {'ordinance', 'no.', '_%number%_', 'series', 'of', ''}
R = {True, False}
f = {('A', 'ordinance'): 'B', ('B', 'no.'): 'C', ('C', '_%number%_'): 'D', ('D', 'series'): 'E', ('E', 'of'): 'G', ('G', '_%number%_'): 'A'}
g = {('A', 'ordinance'): True, ('B', 'no.'): True, ('C', '_%number%_'): True, ('D', 'series'): True, ('E', 'of'): True, ('G', '_%number%_'): True}
qi = 'A'

# Create an instance of the TSAFM
fsm = TSAFM_Ordinances(Q, S, R, f, g, qi)

extracted_ordinances = fsm.run("Mayor Benjamin Magalong signed city ordinance no. 26, series of 2023, approving the request for authority to charge the amount of P28,000 against the 2023 current appropriations of the City Administrator’s Office (CAO)  for the payment of tokens of personalized eco-bag purchased since 2020. The quick brown Ordinance nO. 26,a Ordinance series of 2023 The quick brown ordinance no. 26,  asfasfas fseries of 2023 The quick brown ORdInance no. 26, Series oF 2023")

for ordinance in extracted_ordinances:
    print(ordinance)

"""
Sample NDFSM: Accepts Valid Resolution Codes
"""
# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D', 'E', 'F', 'G'}
S = {'resolution', 'no.', '_%number%_', 'series', 'of', ''}
R = {True, False}
f = {('A', 'resolution'): 'B', ('B', 'no.'): 'C', ('C', '_%number%_'): 'D', ('D', 'series'): 'E', ('E', 'of'): 'G', ('G', '_%number%_'): 'A'}
g = {('A', 'resolution'): True, ('B', 'no.'): True, ('C', '_%number%_'): True, ('D', 'series'): True, ('E', 'of'): True, ('G', '_%number%_'): True}
qi = 'A'

# Create an instance of the TSAFM
fsm = TSAFM_Ordinances(Q, S, R, f, g, qi)

extracted_ordinances = fsm.run("Under Resolution No. 196, series of 2023, The Committee on Laws, Human Rights and Justice, in its 5th endorsement dated March 14, 2023, recommended to note certain management observations and recommendations to be attended to.")

for ordinance in extracted_ordinances:
    print(ordinance)

ordinance no. 26, series of 2023
ORdInance no. 26, Series oF 2023
Resolution No. 196, series of 2023


In [164]:
from unidecode import unidecode

class FSA_CandidateTitleToken:
    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        q = self.q0
        out = 0
        for token in input_str:
            # standardize font style
            token = unidecode(token)
            # check if character is lower alphabet
            if ord(token) > 96 and ord(token) < 123:
                token = "lower"
            # check if character is upper alphabet
            elif ord(token) > 64 and ord(token) < 91:
                token = "upper"
            # check if character is numerical
            elif ord(token) > 47 and ord(token) < 58:
                token = "numnerical"
            # else, character is special character
            else:
                token = "special"
            q_next = self.f.get((q, token), None)
            out = self.g.get((q, token), None)
            q = q_next
            if out == False:
                return False
        return True
    

# Define the parameters for the TSAFM
Q = {'A', 'B', 'C'}
S = {"lower", "upper", "numerical", "special"}
R = {True, False}
f = {('A', 'lower'): 'A', ('A', 'upper'): 'B', ('A', 'numerical'): 'B', ('A', 'special'): 'C', 
     ('B', 'upper'): 'B', ('B', 'numerical'): 'B', ('B', 'special'): 'B', ('B', 'lower'): 'A', 
     ('C', 'lower'): 'A', ('C', 'upper'): 'B', ('C', 'numerical'): 'B', ('C', 'special'): 'C'}
g = {('A', 'lower'): False, ('A', 'upper'): True, ('A', 'numerical'): True, ('A', 'special'): False, 
     ('B', 'upper'): True, ('B', 'numerical'): True, ('B', 'special'): True, ('B', 'lower'): False, 
     ('C', 'lower'): False, ('C', 'upper'): True, ('C', 'numerical'): True, ('C', 'special'): False}
qi = 'A'

sample_text = "𝗣𝗥𝗘𝗦𝗦 𝗙𝗢𝗥𝗨𝗠 𝗙𝗢𝗖𝗨𝗦𝗘𝗦 𝗢𝗡 𝗪𝗢𝗠𝗘𝗡 𝗜𝗡 𝗧𝗛𝗘 𝗠𝗜𝗡𝗜𝗡𝗚 𝗜𝗡𝗗𝗨𝗦𝗧𝗥𝗬 “Mining is a male-dominated industry but slowly and surely it's also being manned by women.” "

# Create an instance of the TSAFM
FSA_CandidateWords = FSA_CandidateTitleToken(Q, S, R, f, g, qi)

title = ""
for candidate in sample_text.split(" "):
    if fsm.run(candidate) is False:
        break
    title += candidate + " "
print(title.strip())

TypeError: FSA_CandidateTitleToken.run() missing 1 required positional argument: 'fsa_validator'

In [169]:
from unidecode import unidecode

class FSA_CandidateTitleToken:
    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str, fsa_validator):
        q = self.q0
        candidate_words = ""
        for token in input_str.split(" "):
            is_candidate = "valid" if fsa_validator.run(token) else "invalid"
            if is_candidate == "invalid":
                break
            q_next = self.f.get((q, is_candidate), None)
            out = self.g.get((q, is_candidate), None)
            if out == "0":
                candidate_words = ""
            elif out == "1":
                candidate_words += token + " "
            else:
                return candidate_words.strip()
            q = q_next
        return candidate_words.strip()
    

# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D'}
S = {'valid', 'invalid'}
R = {'0', '1', '2'}
f = {('A', 'valid'): 'B', ('A', 'invalid'): 'A', 
     ('B', 'valid'): 'C', ('B', 'invalid'): 'A',
     ('C', 'valid'): 'C', ('C', 'invalid'): 'D',
     ('D', 'valid'): 'A', ('D', 'invalid'): 'A',}
g = {('A', 'valid'): '1', ('A', 'invalid'): '0', 
     ('B', 'valid'): '1', ('B', 'invalid'): '0',
     ('C', 'valid'): '1', ('C', 'invalid'): '2',
     ('D', 'valid'): '0', ('D', 'invalid'): '0',}
qi = 'A'

sample_text = "𝗣𝗥𝗘𝗦𝗦 𝗙𝗢𝗥𝗨𝗠 𝗙𝗢𝗖𝗨𝗦𝗘𝗦 𝗢𝗡 𝗪𝗢𝗠𝗘𝗡 𝗜𝗡 𝗧𝗛𝗘 𝗠𝗜𝗡𝗜𝗡𝗚 𝗜𝗡𝗗𝗨𝗦𝗧𝗥𝗬"

# Create an instance of the TSAFM
fsm = FSA_CandidateTitleToken(Q, S, R, f, g, qi)

fsm.run(sample_text, FSA_CandidateWords)

''

In [8]:
from unidecode import unidecode

# month dd, year
class FSA_Date1:
    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        q = self.q0
        accepted_words = []
        candidate_words = ""
        for token in input_str.split(" "):
            symbol = unidecode(token).lower().strip()
            stimulus = "INVALID"
            # check if symbol is month
            if symbol in ["january", "february", "march", "april", 
                          "may", "june", "july", "august", 
                          "september", "october", "november", "december"]:
                stimulus = "MONTH"

            # check if symbol is candidate year
            elif len(symbol) == 4 or len(symbol) == 5:
                if (symbol.isdigit() or symbol[:-1].isdigit()):
                    stimulus = "YEAR"

            # Check if symbol is valid number candidate
            elif len(symbol) > 1:
                if symbol[:-1].isdigit() and symbol[-1] == ",":
                    stimulus = "DAY"


            q_next = self.f.get((q, stimulus), "A")
            out = self.g.get((q, stimulus), "0")

            if out == "0":
                candidate_words = ""
            elif out == "1":
                candidate_words += token + " "
            # if accepting state
            if q_next == "D":
                accepted_words.append(candidate_words.strip())
            q = q_next
        
        return accepted_words
    

# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D'}
S = {'MONTH', 'DAY', 'YEAR', "INVALID"}
R = {'0', '1', '2'}
f = {('A', 'MONTH'): 'B', ('B', 'DAY'): 'C', ('C', 'YEAR'): 'D'}
g = {('A', 'MONTH'): '1', ('B', 'DAY'): '1', ('C', 'YEAR'): '1', 
     ('D', 'MONTH'): '2', ('D', 'DAY'): '2', ('D', 'YEAR'): '2', ('D', 'INVALID'): '2'}
qi = 'A'

sample_text = "September 29, 2023. CARAA MEET PARADE ON APRIL 29, 2023 -- DEPED december 29, 2023"

# Create an instance of the TSAFM
fsm = FSA_Date1(Q, S, R, f, g, qi)

fsm.run(sample_text)

['September 29, 2023.', 'APRIL 29, 2023', 'december 29, 2023']

In [68]:
# checks for valid date number 01, 102, 03 .. 99
class FSA_Date_Num:
    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        q = self.q0
        out = False
        if len(input_str) > 2:
            return False
        for token in input_str:
            q_next = self.f.get((q, token), "A")
            out = self.g.get((q, token), False)
            q = q_next
        return out
    
# Define the parameters for the TSAFM
Q = {'A', 'B', 'C'}
S = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
R = {False, True}
f = {('A', '0'): 'B', ('A', '1'): 'B', ('A', '2'): 'B', ('A', '3'): 'B', ('A', '4'): 'B',
     ('A', '5'): 'B', ('A', '6'): 'B', ('A', '7'): 'B', ('A', '8'): 'B', ('A', '9'): 'B',
     ('B', '1'): 'C', ('B', '2'): 'C', ('B', '3'): 'C', ('B', '4'): 'C', ('B', '5'): 'C',
     ('B', '6'): 'C', ('B', '7'): 'C', ('B', '8'): 'C', ('B', '9'): 'C', ('B', '0'): 'A'}
g = {('B', '1'): True, ('B', '2'): True, ('B', '3'): True, ('B', '4'): True, ('B', '5'): True,
     ('B', '6'): True, ('B', '7'): True, ('B', '8'): True, ('B', '9'): True, ('B', '0'): True}
qi = 'A'

sample_text = "4/20/23 4/20/23"

fsm_yy = FSA_Date_Num(Q, S, R, f, g, qi)

fsm_yy.run("00")

True

In [129]:
from unidecode import unidecode

# mm/dd/yy
class FSA_Date2:
    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        q = self.q0
        out = False
        tokens = unidecode(input_str).split("/")

        if len(tokens) != 3:
            return out
        for i,token in enumerate(tokens):
            symbol = None

            # if to be check is either day or month
            if i != 2:
                # if the token is a digit, check validity
                if token.isdigit():
                    if int(token) > 0 and int(token) < 32:
                        symbol = "DD"
                    if int(token) > 0 and int(token) < 13:
                        symbol = "MM"
                    if int(token) < 0 or int(token) > 99 or token[0] == "0":
                        symbol = "INVALID"
                else:
                    symbol = "INVALID"
            # if to be checked is year
            else:
                # if token's last element is a digit
                if token[-1].isdigit():
                    if len(token) == 2:
                        symbol = "YY"
                # if token's last element is not a digit
                else:
                    # check if characters preceding it is a digit
                    if token[:-1].isdigit():
                        if len(token[:-1]) == 2:
                            symbol = "YY"
                        else:
                            symbol ="INVALID"
                    else:
                        symbol = "INVALID"

            q_next = self.f.get((q, symbol), "A")
            print(token, q, symbol, q_next)
            out = self.g.get((q, symbol), False)
            
            q = q_next
        return out
    

# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D'}
S = {'MM', 'DD', 'YY'}
R = {False, True}
f = {('A', 'MM'): 'B', ('B', 'DD'): 'C', ('C', 'YY'): 'D',
     ('A', 'DD'): 'A', ('C', 'MM'): 'D', ('C', 'DD'): 'D', 
     ('B', 'MM'): 'C'}
g = {('C', 'YY'): True, ('C', 'MM'): True, ('C', 'DD'): True }
qi = 'A'

sample_text = "12/12/00 4/2/23 12/20/23,"

# Create an instance of the TSAFM
fsm = FSA_Date2(Q, S, R, f, g, qi)

for candidate in sample_text.split(" "):
    if fsm.run(candidate) is False:
        break
    print("ACCEPTED", candidate)

12 A MM B
12 B MM C
00 C YY D
ACCEPTED 12/12/00
4 A MM B
2 B MM C
23 C YY D
ACCEPTED 4/2/23
12 A MM B
20 B DD C
23, C YY D
ACCEPTED 12/20/23,


In [154]:
from unidecode import unidecode

class TSM_Proclamations:
    """
    Implementation of a Non-Deterministic Transition-Assigned Finite State Machine (Mealy Automaton)

    where:
    Q - Set of states
    S - input alphabet
    R - output alphabet
    f - state transition function (Q x S -> Q)
    g - output function (Q x S -> R)
    qi - initial state
    """

    def __init__(self, Q, S, R, f, g, qi):
        self.Q = Q
        self.S = S
        self.R = R
        self.f = f
        self.g = g
        self.q0 = qi

    def run(self, input_str):
        """
        Processes an input string and returns the output generated by the FSM.

        Returns:
        - output: the output generated by the FSM for the input string
        """
        q = self.q0
        proclamation_codes = []
        candidate_string = ""
        for symbol_raw in input_str.split(" "):
            token = None
            symbol = unidecode(symbol_raw).lower()

            # Check if symbol is reserved word
            if symbol in self.S:
                token = symbol
            # Check if symbol is valid number candidate
            elif len(symbol) > 1:
                # Checks if the symbol is a number, while taking into consideration a 
                # possible trailing comma or period in the string
                if (symbol.isdigit() or symbol[:-1].isdigit()) and symbol[-2].isdigit():
                    token = "_%number%_"
                    if symbol[-1].isdigit():
                        if len(symbol) == 4 and int(symbol) > 999 and int(symbol) < 3000:
                            token = "_%year%_"
                    else:
                        if len(symbol) == 5 and int(symbol[:-1]) > 999 and int(symbol[:-1]) < 3000:
                            token = "_%year%_"
            # If there is no next state given a current state and input,
            # Then go back to state A and reset candidate_string
            q_next = self.f.get((q, token), None)
            if q_next is None:
                q = "A"
                candidate_string = ""
                continue
            
            if len(candidate_string) != 0:
                print(symbol_raw, q, token)
            # If the next state is A, this means that the candidate string 
            # is now a complete and valid ordinance code.
            # Append candidate_string to the list of ordinance codes taken
            # from given input.
            # Reset candidate string to empty string
            if q_next == "A":
                # Remove trailing comma or period in year if there is any
                if symbol[-1] == "," or symbol[-1] == ".":
                    symbol_raw = symbol_raw[:-1]
                candidate_string += symbol_raw + " "
                proclamation_codes.append(candidate_string.strip())
                candidate_string = ""
                q = q_next
                pass

            # If there is a new state, then append current word to 
            # candidate_string
            candidate_string += symbol_raw + " "
            q = q_next
        print(proclamation_codes)
        return proclamation_codes


"""
Sample NDFSM: Accepts Valid Ordinance Codes
"""
# Define the parameters for the TSAFM
Q = {'A', 'B', 'C', 'D', 'E', 'F'}
S = {'proclamation', 'no.', '_%number%_', 's.', '_%year%_'}
R = {True, False}
f = {('A', 'proclamation'): 'B', ('B', 'no.'): 'C', ('C', '_%number%_'): 'D', ('C', '_%year%_'): 'D', ('D', 's.'): 'E', ('E', '_%year%_'): 'A'}
g = {('A', 'proclamation'): True, ('B', 'no.'): True, ('C', '_%number%_'): True, ('D', 's.'): True, ('E', '_%year%_'): True}
qi = 'A'

# Create an instance of the TSAFM
fsm = TSM_Proclamations(Q, S, R, f, g, qi)

extracted_ordinances = fsm.run("Mayor Benjamin Magalong signed city ordinance no. 2022, 𝐏𝐑𝐎𝐂𝐋𝐀𝐌𝐀𝐓𝐈𝐎𝐍 𝐍𝐎. 𝟐𝟎22, 𝐬. 𝟐𝟎𝟐𝟑 series of 2023, approving the request for authority to charge the amount of P28,000 against the 2023 current appropriations of the City Administrator’s Office (CAO)  for the payment of tokens of personalized eco-bag purchased since 2020. The quick brown Ordinance nO. 26,a Ordinance series of 2023 The quick brown ordinance no. 26,  asfasfas fseries of 2023 The quick brown ORdInance no. 26, Series oF 2023")

for ordinance in extracted_ordinances:
    print(ordinance)

𝐍𝐎. B no.
𝟐𝟎22, C _%year%_
𝐬. D s.
𝟐𝟎𝟐𝟑 E _%year%_
['𝐏𝐑𝐎𝐂𝐋𝐀𝐌𝐀𝐓𝐈𝐎𝐍 𝐍𝐎. 𝟐𝟎22, 𝐬. 𝟐𝟎𝟐𝟑']
𝐏𝐑𝐎𝐂𝐋𝐀𝐌𝐀𝐓𝐈𝐎𝐍 𝐍𝐎. 𝟐𝟎22, 𝐬. 𝟐𝟎𝟐𝟑
