In [2]:
# Try to run IntelligentFormAgent.py, otherwise load fallback shim
import os, sys, types, re

def _load_fallback_shim():
    print("Loading fallback shim...")
    def find_key_value(text, key_patterns):
        results = {}
        for kp in key_patterns:
            try:
                m = re.search(kp, text, flags=re.IGNORECASE)
            except re.error:
                idx = text.lower().find(kp.lower())
                if idx != -1:
                    results[kp] = text[idx:idx+100]
                continue
            if m:
                results[kp] = m.group(1) if m.groups() else m.group(0)
        return results

    def chunk_text(text, chunk_size=500, overlap=50):
        if not text:
            return []
        words = text.split()
        step = max(1, chunk_size - overlap)
        return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), step)]

    src_mod = types.ModuleType("src")
    extractor_mod = types.ModuleType("src.extractor")
    extractor_mod.find_key_value = find_key_value
    extractor_mod.chunk_text = chunk_text
    src_mod.extractor = extractor_mod

    sys.modules["src"] = src_mod
    sys.modules["src.extractor"] = extractor_mod
    print("Fallback src.extractor loaded.")

# Prefer the real file
if os.path.exists("IntelligentFormAgent.py.ipy"):
    print("Running IntelligentFormAgent.py...")
    try:
        get_ipython().run_line_magic("run", "IntelligentFormAgent.py.ipy")
    except:
        print("Error — using fallback shim.")
        _load_fallback_shim()
else:
    print("IntelligentFormAgent.py not found — using fallback shim.")
    _load_fallback_shim()


Running IntelligentFormAgent.py...


In [3]:
from src.extractor import find_key_value

text = """
Name: Neha Kulkarni
DOB: 04/11/2001
Email: neha.k@example.com
"""

patterns = [
    r"Name[:\s]*([A-Za-z ]+)",
    r"DOB[:\s]*([0-9/\-]+)",
    r"Email[:\s]*([A-Za-z0-9@.\-]+)",
]

result = find_key_value(text, patterns)
result


{'Name[:\\s]*([A-Za-z ]+)': 'Neha Kulkarni',
 'DOB[:\\s]*([0-9/\\-]+)': '04/11/2001',
 'Email[:\\s]*([A-Za-z0-9@.\\-]+)': 'neha.k@example.com'}

In [4]:
from src.extractor import chunk_text

text = "This is a demo sentence " * 50   # long text

chunks = chunk_text(text, chunk_size=20, overlap=5)

print("Number of chunks:", len(chunks))
print("Sample chunk:\n", chunks[0])


Number of chunks: 17
Sample chunk:
 This is a demo sentence This is a demo sentence This is a demo sentence This is a demo sentence
