## Welcome to CS4TB3 Group 14 Final Project Demo

### Via this application, we aim to convert Regular Expressions into Simplified Natural Language

Code available at https://github.com/anant-j/CS4TB3

**Usage:**  
- Run all cells
- Select an example from the last cell or use your own regex input
- Call demo() method or run last cell again to repeat

In [None]:
%pip install -q pydot

In [None]:
# Setup for Prefix Tree (Trie) generation and graph visualization 
import json
import pydot
from IPython.display import Image, display

def view_pydot(pdot):
    plt = Image(pdot.create_png())
    display(plt)

# Function to return a new dict template
def struct():
    struct = {
        'iw': 'False'
    }
    return struct

# Make Trie out of words
# Sorted by length, then 10 shortest valid regex outputs chosen
def make_trie(words):
    tmp_s = struct()
    root = tmp_s
    words.sort(key=len)
    words = words[0:10]
    for word in words:
        for c in word:
            if c not in tmp_s:
                tmp_s[c] = struct()
            tmp_s = tmp_s[c]
        tmp_s['iw'] = 'True'
        tmp_s = root
        cur_word = []
    return root

# Code borrowed from: https://github.com/ahmednooor/trie_graph with minor modifications
counter = 0
def genTree(words):
    global counter
    rt = {'root': make_trie(words)}
    counter = 0
    def draw(parent_name, child_name):
        global counter
        counter += 1
        p_n = parent_name
        c_n = child_name
        graph.add_node(pydot.Node(p_n, label=parent_name.split('_')[0]))
        graph.add_node(pydot.Node(c_n, label=child_name.split('_')[0]))
        edge = pydot.Edge(p_n, c_n)
        graph.add_edge(edge)

    def visit(node, parent=None):
        global counter
        for k,v in node.items():
            if isinstance(v, dict):
                # We start with the root node whose parent is None
                # we don't want to graph the None node
                k = k + '_' + str(counter)
                if parent:
                    draw(parent, k)
                visit(v, k)
            else:
                # drawing the label using a distinct name
                v = v + '_' + str(counter)
                draw(parent, v)

    graph = pydot.Dot(graph_type='digraph')
    visit(rt)
    plt = Image(graph.create_png())
    display(plt)

In [None]:
# Setting up API
import requests
baseURL = "https://vxk4urue7yyomdrnme22wjqqku0fgmxc.lambda-url.us-east-1.on.aws/"

# Auxiliary Methods:
def extractPatternData(patterns):
    if(patterns["minimumLength"] == 0):
        print("Empty string is allowed")
    else:
        print(f"Minimum length of generated string is {patterns['minimumLength']}")
    for char in patterns["proceeding"]:
        outStr = ""
        outStr = f"Character '{char}' can only appear after Character '{patterns['proceeding'][char]['prev']}'"
        if(patterns["proceeding"][char]['start']):
            outStr += " or start of the string"
        print(outStr)
    for char in patterns["preceding"]:
        outStr = ""
        outStr = f"Character '{char}' can only appear before Character '{patterns['preceding'][char]['next']}'"
        if(patterns["preceding"][char]['end']):
            outStr += " or end of the string"
        print(outStr)
    for char in patterns["oddEven"]["odd"]:
        print(f"Occurrences of '{char}' is always odd")
    for char in patterns["oddEven"]["even"]:
        print(f"Occurrences of '{char}' is always even")
        
def convertRegexToNL(regexInput = None):
    while(not regexInput):
        print("Please enter a regex to convert:")
        regexInput = input()
    print("Please Wait")
    requestBody = {"input": regexInput}
    x = requests.post(baseURL, data = requestBody)
    if(x.status_code == 200):
        data = x.json()
        if(data["success"] == False):
            print("An error occurred: " + data["reason"])
            return
        if(data["processed"] != regexInput):
            print(f"Processed/Simplified input: {data['processed']}")
        print(f"Size of generated sample set: {len(data['permutations'])}")
        sortedPermutations = data['permutations']
        sortedPermutations.sort(key = len)
        if(sortedPermutations[0]):
            print(f"One example of generated sample: {sortedPermutations[0]}")
        print("Patterns:")
        extractPatternData(data["patterns"])
        print(f"Time Taken to process: {data['time']} seconds")
        print("Prefix Tree (Depicting a few shortest paths): ")
        genTree(data['permutations'])
    else:
        print("An unexpected error occurred, please try again later")

In [None]:
demoRegexes = ["ab*(a|bc)+", "ba(abb|abab|abba|baba)(a)+", "[a-zA-Z0-9]+@[a-zA-Z]+\.com", "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]", "a(aa)+(bcd)?(aab)*", "[a-b]+(ab*)(cd)?(ef|gh)"]

def demo():
    print("------------------------\n Select a number below to run the following examples:", flush = True)
    print("\t0. Terminate Program", flush = True)
    for i in range(0,len(demoRegexes)):
      regex = demoRegexes[i]
      print(f"\t{i+1}. {regex}", flush = True)
    print("or enter your own regex in the input field")
    userInput = input()
    while(not userInput):
        print("Please select a number to continue:")
        userInput = input()
    try:
        userInput = int(userInput)
        if(userInput == 0):
            print("Program Terminated")
            return
        elif(userInput >= 1 and userInput <= len(demoRegexes)):
            convertRegexToNL(demoRegexes[userInput-1])
            return
        else:
            convertRegexToNL()
            return
    except:
        convertRegexToNL(userInput)

In [None]:
demo()