In [1]:
# Minimal test notebook for the AI agent
# - INI reading (small subset)
# - File import (tab-delimited, similar to pipeline)
# - AI placeholder cells (to implement llm_transform and prompt templates)
# - Export example

import configparser
import ast
from pathlib import Path


def _parse_list_option(val):
    if not val:
        return []
    try:
        parsed = ast.literal_eval(val)
        if isinstance(parsed, (list, tuple)):
            return [str(x).strip() for x in parsed]
    except Exception:
        return [c.strip() for c in str(val).split(',') if c.strip()]
    return []


def find_config(name="lang_pipeline.ini"):
    p = Path.cwd()
    for _ in range(6):
        candidate = p / name
        if candidate.exists():
            return candidate
        p = p.parent
    return next(Path.cwd().rglob(name), None)


cfg_path = find_config()
CONFIG = {}
if cfg_path:
    cp = configparser.ConfigParser()
    cp.read(cfg_path)
    if 'pipeline' in cp:
        sec = cp['pipeline']
        CONFIG['source'] = sec.get('source', None)
        CONFIG['source_browser'] = sec.getboolean('source_browser', fallback=True)
        CONFIG['preview'] = sec.getboolean('preview', fallback=True)
        CONFIG['preview_rows'] = sec.getint('preview_rows', fallback=20)
        CONFIG['columns_export_filter'] = sec.getboolean('columns_export_filter', fallback=False)
        CONFIG['columns_export'] = _parse_list_option(sec.get('columns_export', ''))
        # agent placeholders + Ollama settings
        CONFIG['ai_provider'] = sec.get('ai_provider', 'ollama')
        CONFIG['ai_model'] = sec.get('ai_model', 'mistral-small3.2')
        CONFIG['ai_ollama_url'] = sec.get('ai_ollama_url', 'http://localhost:11434')
        CONFIG['ai_timeout'] = sec.getint('ai_timeout', fallback=30)
        CONFIG['ai_retries'] = sec.getint('ai_retries', fallback=1)
        CONFIG['ai_structured_response'] = sec.getboolean('ai_structured_response', fallback=True)
        CONFIG['ai_batch_size'] = sec.getint('ai_batch_size', fallback=20)
        # new: temperature (float)
        try:
            CONFIG['ai_temperature'] = float(sec.get('ai_temperature', '0.2'))
        except Exception:
            CONFIG['ai_temperature'] = 0.2
        CONFIG['custom_prompt'] = sec.get('custom_prompt', '')
    print(f"Loaded config from: \n{cfg_path}")
else:
    print("No lang_pipeline.ini found; using defaults")

Loaded config from: 
/Users/dunevv/WorkLocal/_AI_/HoudiniElf/tools_Didka/Notebooks/lang_pipeline.ini


In [2]:
# File selection (interactive or from INI)
import platform, subprocess
from pathlib import Path

if not CONFIG.get('source_browser', True) and CONFIG.get('source'):
    input_path = Path(CONFIG['source'])
    if not input_path.exists():
        raise FileNotFoundError(f"Configured (INI) source not found: {input_path}")
    print("Configured (INI) source used:", input_path)
else:
    try:
        from tools.ui_native import pick_file
    except Exception:
        def pick_file(filter_str=None):
            system = platform.system()
            if system == "Darwin":
                script = '''
                set theFile to choose file
                POSIX path of theFile
                '''
                res = subprocess.run(["osascript", "-e", script], capture_output=True, text=True)
                return res.stdout.strip()
            elif system == "Windows":
                ps_script = r'''
                Add-Type -AssemblyName System.Windows.Forms
                $ofd = New-Object System.Windows.Forms.OpenFileDialog
                $ofd.Filter = "All files (*.*)|*.*"
                if ($ofd.ShowDialog() -eq "OK") { Write-Output $ofd.FileName }
                '''
                res = subprocess.run(["powershell", "-NoProfile", "-Command", ps_script], capture_output=True, text=True)
                return res.stdout.strip()
            else:
                raise NotImplementedError("No native file dialog for this OS")

    pick_path = pick_file("CSV files (*.csv)|*.csv")
    if not pick_path:
        raise FileNotFoundError("No file selected")

    input_path = Path(pick_path)
    print(f"Selected input file: \n{input_path}")

Configured (INI) source used: /Users/dunevv/WorkLocal/_AI_/HoudiniElf/tools_Didka/test_files/sources/Process_names_LLMoutput_mini_numbered_cut.csv


In [3]:
# Line-by-line import (tab-delimited)
import csv

rows = []
bad_rows = []
total_lines = 0
with input_path.open("r", encoding="utf-8", errors="replace") as f:
    reader = csv.reader(f, delimiter="\t", quotechar='"', escapechar='\\')
    try:
        header = next(reader)
    except StopIteration:
        header = []
    for line_number, line in enumerate(reader, start=2):
        total_lines += 1
        if len(line) == len(header):
            rows.append(dict(zip(header, line)))
        elif len(line) > len(header) and len(line) % len(header) == 0:
            for i in range(0, len(line), len(header)):
                subline = line[i:i+len(header)]
                rows.append(dict(zip(header, subline)))
        else:
            bad_rows.append((line_number, line))

expected_columns = header if 'header' in locals() else []
print(f"Header: \n{expected_columns}\n")
print(f"Imported {len(rows)} rows ({total_lines} lines read).")
if bad_rows:
    print(f"{len(bad_rows)} malformed lines found.")
else:
    print("No malformed lines found.")

Header: 
['ID', 'name', 'language', 'translation', 'confidence', 'breakdown', 'Remaining', 'Unallowed_characters', 'transliterations', 'Unmapped_in_transliterions']

Imported 41 rows (41 lines read).
No malformed lines found.


In [None]:
# AI adapter (batched no-op)
# Calls the model in batches for testing/debugging but DOES NOT modify rows or add columns.
import subprocess, json, math, time


def call_ollama_run(model, prompt, timeout=CONFIG.get('ai_timeout', 30)):
    cmd = ["ollama", "run", model]
    try:
        proc = subprocess.run(cmd, input=prompt.encode('utf-8'), stdout=subprocess.PIPE, stderr=subprocess.PIPE, timeout=timeout)
    except Exception as e:
        return None, f"ollama-run-failed: {e}"
    out = proc.stdout.decode('utf-8', errors='replace').strip()
    if proc.returncode != 0:
        return None, proc.stderr.decode('utf-8', errors='replace').strip()
    return out, None


def llm_transform(records):
    """Batched model invocation that does not modify rows.
    Returns the original records unchanged. Prints per-batch outputs for inspection.
    """
    if not records:
        return []

    batch_size = max(1, CONFIG.get('ai_batch_size', 20))
    model = CONFIG.get('ai_model', 'mistral-small3.2')
    outputs = []
    for start in range(0, len(records), batch_size):
        end = min(start + batch_size, len(records))
        batch = records[start:end]
        # Keep payload small by including only indices and names
        items = [{"idx": i - start, "name": r.get('name', '')} for i, r in enumerate(batch, start=start)]
        prompt = (
            f"For the following {len(batch)} items return a JSON array of objects with fields 'idx' and 'ai_notes' (short string). "
            "Only return the JSON array and nothing else.\n\nINPUT:\n" + json.dumps(items, ensure_ascii=False)
        )
        out, err = call_ollama_run(model, prompt, timeout=CONFIG.get('ai_timeout', 30))
        if err:
            print(f"Batch {start+1}-{end}: Ollama error: {err}")
            outputs.append({'start': start, 'end': end, 'ok': False, 'error': err})
            continue
        # try to parse JSON but DO NOT modify the rows
        try:
            parsed = json.loads(out)
            outputs.append({'start': start, 'end': end, 'ok': True, 'parsed': parsed})
            print(f"Batch {start+1}-{end}: parsed {len(parsed)} items")
        except Exception:
            outputs.append({'start': start, 'end': end, 'ok': False, 'raw_len': len(out)})
            print(f"Batch {start+1}-{end}: could not parse JSON response (raw length {len(out)}).")
        time.sleep(0.1)
    print(f"Completed {math.ceil(len(records)/batch_size)} batches (batch_size={batch_size}).")
    return records

# Run a quick transform on the imported rows (for interactive testing)
processed_rows = llm_transform(rows)
print(f"Processed {len(processed_rows)} rows via llm_transform (batched noop).")

LLM transform failed: name 'llm_transform' is not defined
Proceeding with 41 rows (marked ai_notes='llm_error').


In [5]:
# Quick preview of processed rows
import pandas as pd

if processed_rows:
    df = pd.DataFrame(processed_rows)
    print("Preview of transformed rows:")
    print(df.head(10).to_string(index=False))
else:
    print("No processed rows to preview")

Preview of transformed rows:
ID                                name language        translation confidence                                                                                                                                                                                      breakdown Remaining Unallowed_characters transliterations Unmapped_in_transliterions  ai_notes
                       Achraf Rent Car      ARA  تأجير سيارات أشرف         85                                                                             Rent Car' translates to تأجير سيارات which clarifies the service offered, while preserving the brand name 'Achraf.                                                                            llm_error
                        Achraf rohdine      ARA        أشرف روهدين         90                                                                                                                       Transliterated as a proper noun, maintaining the original pronuncia

In [None]:
# Export result (tab-delimited) — safe overwrite, original columns only
import csv

export_fieldnames = list(expected_columns)
output_suffix = "_agent_test_out"
stem = input_path.stem
ext = input_path.suffix
output_path = input_path.with_name(f"{stem}{output_suffix}{ext}")
print(f"Exporting to: {output_path}")

with output_path.open("w", encoding="utf-8", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=export_fieldnames, delimiter="\t", extrasaction='ignore', restval='')
    writer.writeheader()
    writer.writerows(processed_rows)

print("Export complete (original columns only)")

In [None]:
# Export result (tab-delimited)
import csv

# No AI columns are added in this test (keep original columns only)
new_cols = ()
export_fieldnames = list(expected_columns)

output_suffix = "_agent_test_out"
stem = input_path.stem
ext = input_path.suffix
output_path = input_path.with_name(f"{stem}{output_suffix}{ext}")
print(f"Exporting to: {output_path}")

with output_path.open("w", encoding="utf-8", newline="") as f:
    writer = csv.DictWriter(f, fieldnames=export_fieldnames, delimiter="\t", extrasaction='ignore', restval='')
    writer.writeheader()
    writer.writerows(processed_rows)

print("Export complete")

Exporting to: /Users/dunevv/WorkLocal/_AI_/HoudiniElf/tools_Didka/test_files/sources/Process_names_LLMoutput_mini_numbered_cut_agent_test_out.csv
Export complete
