In [7]:
import json

# === CONFIGURATION ===
MODEL_FILE = "htpc_model.json"
TEST_SENTENCE = "the dog ran in the park and the cat slept on the mat"

# === TEXT PROCESSING ===

def normalize_token(token):
    return token.lower().strip(".,!?;:()[]{}\"'")

def tokenize(sentence):
    return [normalize_token(tok) for tok in sentence.strip().split() if tok]

# === LOAD MODEL ===

def load_model(json_path):
    with open(json_path, "r") as f:
        model = json.load(f)

    token_transitions = model['token_transitions']

    bigram_memory = {
        tuple(key.split("|||")): value
        for key, value in model['bigram_memory'].items()
    }

    phrase_memory = {
        tuple(tuple(pair.split("__")) for pair in key.split("|||")): value
        for key, value in model['phrase_memory'].items()
    }

    return token_transitions, bigram_memory, phrase_memory

# === PATTERN RECOGNITION WITH PHRASE MATCHING ===

def match_phrases(tokens, phrase_memory):
    matched_phrases = []
    for i in range(len(tokens) - 2):
        phrase = tuple((tokens[j], tokens[j+1]) for j in range(i, i + 2))
        if phrase in phrase_memory:
            matched_phrases.append(" ".join([phrase[0][0], phrase[0][1], phrase[1][1]]))
    return matched_phrases



def recognize_patterns(tokens, token_transitions, bigram_memory, phrase_memory):
    patterns = []
    current_pattern = []

    for i in range(len(tokens)):
        if i == 0:
            current_pattern.append(tokens[i])
            continue

        prev_token = tokens[i - 1]
        curr_token = tokens[i]
        bigram = (prev_token, curr_token)

        is_valid_transition = (
            token_transitions.get(prev_token) == curr_token or
            bigram in bigram_memory
        )

        if is_valid_transition:
            current_pattern.append(curr_token)
        else:
            if len(current_pattern) > 1:
                matched_phrases = match_phrases(current_pattern, phrase_memory)
                patterns.append((current_pattern.copy(), matched_phrases))
            current_pattern = [curr_token]

    if len(current_pattern) > 1:
        matched_phrases = match_phrases(current_pattern, phrase_memory)
        patterns.append((current_pattern, matched_phrases))

    return patterns

# === MAIN EXECUTION ===

if __name__ == "__main__":
    token_transitions, bigram_memory, phrase_memory = load_model(MODEL_FILE)
    tokens = tokenize(TEST_SENTENCE)
    print("Trained Phrases:")
    for p in phrase_memory:
        print("  ", " ".join([p[0][0], p[0][1], p[1][1]]))
    patterns = recognize_patterns(tokens, token_transitions, bigram_memory, phrase_memory)

    print("🔍 Recognized Patterns:")
    for idx, (tokens, phrases) in enumerate(patterns, 1):
        print(f"\n  Pattern {idx}:")
        print(f"    Tokens: {' '.join(tokens)}")
        if phrases:
            print("    Phrases Matched:")
            for p in phrases:
                print(f"      • {p}")
        else:
            print("    Phrases Matched: (none)")


Trained Phrases:


IndexError: tuple index out of range