In [6]:
import json
import random
import re

# === CONFIGURA√á√ÉO ===
MODEL_FILE = "htpc_model.json"
MULTIWORDS_FILE = "multiwords.txt"
TEST_SENTENCE = "a menina estava tomando caf√© da manh√£ com o cachorro na sala"
NOISE_PROBABILITY = 0.0

# === MULTIWORDS ===
def load_multiwords(path):
    multiwords = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            mw = line.strip().lower()
            if mw:
                multiwords.append((mw, mw.replace(" ", "_")))
    return multiwords

def replace_multiwords(text, multiword_list):
    for original, replacement in multiword_list:
        text = text.replace(original, replacement)
    return text

# === TOKENIZA√á√ÉO ===
def normalize_token(token):
    return re.sub(r"[.,!?;:()\\[\\]{}\"']", "", token.lower())

def tokenize(sentence):
    return [normalize_token(tok) for tok in sentence.strip().split() if tok]

# === MODELO HTPC ===
def load_model(json_path):
    with open(json_path, "r", encoding="utf-8") as f:
        model = json.load(f)

    token_transitions = model['token_transitions']
    bigram_memory = {
        tuple(key.split("|||")): value
        for key, value in model['bigram_memory'].items()
    }
    phrase_memory = {
        tuple(tuple(pair.split("__")) for pair in key.split("|||")): value
        for key, value in model['phrase_memory'].items()
    }
    return token_transitions, bigram_memory, phrase_memory

# === REMAPEAMENTO TOP-DOWN ===
def dynamic_feedback_for_context(context_tokens, phrase_memory, noise_probability=0.0):
    expectations = set()
    for phrase in phrase_memory:
        flat = [phrase[0][0]] + [pair[1] for pair in phrase]
        for i in range(len(flat) - 1):
            match_len = i + 1
            if flat[:match_len] == context_tokens[-match_len:]:
                next_token = flat[i + 1]
                if random.random() > noise_probability:
                    expectations.add(next_token)
    return expectations

# === COMPARA FRASES ===
def match_all_phrases(tokens, phrase_memory):
    matched_phrases = []
    n = len(tokens)
    for phrase in phrase_memory:
        phrase_len = len(phrase) + 1
        for i in range(n - phrase_len + 1):
            test_bigrams = tuple((tokens[j], tokens[j + 1]) for j in range(i, i + phrase_len - 1))
            if test_bigrams == phrase:
                phrase_str = " ".join([tokens[i]] + [tokens[i + k + 1] for k in range(len(phrase))])
                matched_phrases.append(phrase_str)
    return matched_phrases

# === RECONHECIMENTO ===
def recognize_patterns(tokens, token_transitions, bigram_memory, phrase_memory, noise_probability=0.0):
    patterns = []
    current_pattern = []

    for i in range(len(tokens)):
        curr_token = tokens[i]
        if i == 0:
            current_pattern.append(curr_token)
            continue

        prev_token = tokens[i - 1]
        bigram = (prev_token, curr_token)

        is_valid_transition = (
            token_transitions.get(prev_token) == curr_token or
            bigram in bigram_memory
        )

        context = current_pattern[-3:]
        dynamic_expectations = dynamic_feedback_for_context(context, phrase_memory, noise_probability)
        top_down_match = curr_token in dynamic_expectations

        if is_valid_transition and top_down_match:
            current_pattern.append(curr_token)
        else:
            print(f"\n‚ö†Ô∏è  Inibi√ß√£o em '{curr_token}' (posi√ß√£o {i})")
            if not is_valid_transition:
                print(f"   ‚îî‚îÄ Feedforward: sem transi√ß√£o v√°lida ap√≥s '{prev_token}'")
            if not top_down_match:
                print(f"   ‚îî‚îÄ Top-down: '{curr_token}' n√£o esperado no contexto {context}")

            if len(current_pattern) > 1:
                matched_phrases = match_all_phrases(current_pattern, phrase_memory)
                patterns.append((current_pattern.copy(), matched_phrases))
            current_pattern = [curr_token]

    if len(current_pattern) > 1:
        matched_phrases = match_all_phrases(current_pattern, phrase_memory)
        patterns.append((current_pattern, matched_phrases))

    return patterns

# === EXECU√á√ÉO ===
if __name__ == "__main__":
    random.seed(42)
    multiwords = load_multiwords(MULTIWORDS_FILE)
    sentence = replace_multiwords(TEST_SENTENCE.lower(), multiwords)
    tokens = tokenize(sentence)

    token_transitions, bigram_memory, phrase_memory = load_model(MODEL_FILE)

    print(f"\nüß† Reconhecimento HTPC com remapeamento din√¢mico (ru√≠do={NOISE_PROBABILITY})")
    patterns = recognize_patterns(tokens, token_transitions, bigram_memory, phrase_memory, NOISE_PROBABILITY)

    print("\nüîç Padr√µes Reconhecidos:")
    for idx, (tokens, phrases) in enumerate(patterns, 1):
        print(f"\n  Padr√£o {idx}:")
        print(f"    Tokens: {' '.join(tokens)}")
        if phrases:
            print("    Frases correspondentes:")
            for p in phrases:
                print(f"      ‚Ä¢ {p}")
        else:
            print("    Frases correspondentes: (nenhuma)")



üß† Reconhecimento HTPC com remapeamento din√¢mico (ru√≠do=0.0)

‚ö†Ô∏è  Inibi√ß√£o em 'fez' (posi√ß√£o 2)
   ‚îî‚îÄ Feedforward: sem transi√ß√£o v√°lida ap√≥s 'menina'
   ‚îî‚îÄ Top-down: 'fez' n√£o esperado no contexto ['a', 'menina']

‚ö†Ô∏è  Inibi√ß√£o em 'o' (posi√ß√£o 3)
   ‚îî‚îÄ Feedforward: sem transi√ß√£o v√°lida ap√≥s 'fez'
   ‚îî‚îÄ Top-down: 'o' n√£o esperado no contexto ['fez']

‚ö†Ô∏è  Inibi√ß√£o em 'com' (posi√ß√£o 5)
   ‚îî‚îÄ Feedforward: sem transi√ß√£o v√°lida ap√≥s 'caf√©_da_manh√£'
   ‚îî‚îÄ Top-down: 'com' n√£o esperado no contexto ['o', 'caf√©_da_manh√£']

üîç Padr√µes Reconhecidos:

  Padr√£o 1:
    Tokens: a menina
    Frases correspondentes: (nenhuma)

  Padr√£o 2:
    Tokens: o caf√©_da_manh√£
    Frases correspondentes: (nenhuma)

  Padr√£o 3:
    Tokens: com o cachorro na sala
    Frases correspondentes:
      ‚Ä¢ com o cachorro
      ‚Ä¢ o cachorro na
      ‚Ä¢ cachorro na sala
