In [2]:
!pip install trieregex

Collecting trieregex
  Downloading trieregex-1.0.0.tar.gz (11 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: trieregex
  Building wheel for trieregex (setup.py) ... [?25ldone
[?25h  Created wheel for trieregex: filename=trieregex-1.0.0-py3-none-any.whl size=12455 sha256=5066915c3eebca879b97aefd94b3aba26a1250d460e1f521c90fdd5946e5d3e5
  Stored in directory: /root/.cache/pip/wheels/af/d8/00/c5122505f958c451048fc359c9937f39dc49ba69143b0cc0c9
Successfully built trieregex
Installing collected packages: trieregex
Successfully installed trieregex-1.0.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [3]:
import re
from trieregex import TrieRegEx as TRE

words = ['lemon', 'lime', 'pomelo', 'orange', 'citron']
more_words = ['grapefruit', 'grape', 'tangerine', 'tangelo']

# Initialize class instance
tre = TRE()

# Add word(s)
tre = TRE(*words)  # word(s) can be added upon instance creation, or after
tre.add('kumquat')  # add one word
tre.add(*more_words)  # add a list of words 

# Remove word(s)
tre.remove('citron')  # remove one word
tre.remove(*words)  # remove a list of words

# Check if a word exists in the trie
tre.has('citron')  # Returns: False
tre.has('tangerine')  # Returns: True

# Create regex pattern from the trie
tre.regex()  # Returns: '(?:tange(?:rine|lo)|grape(?:fruit)?|kumquat)'

# Add boundary context and compile for matching
pattern = re.compile(f'\\b{tre.regex()}\\b')  # OR rf'\b{tre.regex()}\b'
pattern  # Returns: re.compile('\\b(?:tange(?:rine|lo)|grape(?:fruit)?|kumquat)\\b')
pattern.findall("A kumquat is tastier than a loquat")  # Returns: ['kumquat']

# Inspect unique initial characters in the trie
tre.initials()  # Returns: ['g', 'k', 't']

# Inspect unique final characters in the trie
tre.finals()  # Returns: ['e', 'o', 't']

['e', 'o', 't']

In [4]:
pattern

re.compile(r'\b(?:tange(?:rine|lo)|grape(?:fruit)?|kumquat)\b', re.UNICODE)

In [5]:
import random
import string

# Function to generate a random word
def generate_random_word(length):
    letters = string.ascii_lowercase
    return ''.join(random.choice(letters) for i in range(length))

# Generate 20,000 random words
random_words = [generate_random_word(random.randint(3, 10)) for _ in range(20000)]

# You can now use the 'random_words' list as needed.
# For example, to print the first 10 words:
print(random_words[:10])
print(len(random_words))

# Initialize class instance
tre = TRE()

# Add word(s)
tre = TRE(*random_words)

print(len(tre.regex()))


['iotcyekpco', 'urwej', 'cha', 'pbnvogh', 'mzqif', 'vhgs', 'lzqxcmlex', 'voulrvwyw', 'rqol', 'fgnyxp']
