In [1]:
from collections import Counter, defaultdict, namedtuple
from copy import deepcopy
import functools
import inspect
import json
import os
from pathlib import Path
import pickle
from pprint import pp, pprint, pformat
import re
import shutil
import sys
import time
from typing import Dict, List

import numpy as np
import pandas as pd
import plotly.express as px

from colorutils import Color

from dotenv import load_dotenv
from jinja2 import Environment, FileSystemLoader, Template
import textwrap
from tqdm.auto import tqdm
# from tqdm import tqdm

import openai

from aic_nlp_utils.json import read_jsonl, read_json, write_json, write_jsonl, process_to_jsonl
from aic_nlp_utils.pycfg import parse_pycfg_args, read_pycfg
%load_ext autoreload
%autoreload 2

from prompt_opt.agents.agent_chat import AgentChat
from prompt_opt.models.predictor_loader import PredictorLoader
from prompt_opt.models.llm_predictor import LLMPredictor
from prompt_opt.optimizers.predict_evaluate import get_candidate_score, rank_candidates
from prompt_opt.utils import *

sys.path.append("/home/drchajan/devel/python/FC/automated-fact-checking")

os.environ['VLLM_WORKER_MULTIPROC_METHOD']='spawn'
load_dotenv()

True

In [3]:
def get_predictors(model_ids):
    openai_base_url = "http://g01:8333/v1"
    api_key = "EMPTY"
    model_name = "meta-llama/Llama-3.1-8B-Instruct"
    # model_name = "hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4"
    # model_name = "alpindale/c4ai-command-r-plus-GPTQ"
    # model_name = "aixsatoshi/c4ai-command-r-plus-08-2024-awq"
    # model_name = "CohereForAI/aya-expanse-8b"
    # model_name = "CohereForAI/aya-expanse-32b"
    # model_name = "Qwen/Qwen2.5-72B-Instruct-AWQ" # not really good for CS
    # model_name = "mistralai/Ministral-8B-Instruct-2410"`

    # guided_decoding_backend = "lm-format-enforcer"  # fails with Regex, JSON works consistently but slower
    guided_decoding_backend = "outlines" # Regex extremely slow :(), some inputs with JSON are extremely slow

    predictor = LLMPredictor(
        model_name, openai_base_url, 
        guided_decoding_backend=guided_decoding_backend,
        template_dir="data/templates/agents"
    )
    system_content = predictor.get_template('chat/system_v1.txt.jinja').render()
    agent = AgentChat(predictor, system_content)
    response = agent.query("Capital of GB?", temperature=0.0)
    print(f"test response: {response}")

    return {mid: predictor for mid in model_ids}

rng = np.random.RandomState(1234)
predictors = get_predictors(model_ids=["optimizer"])

test response: The capital of Great Britain (GB) is London.


In [4]:
def load_config(cfg_path, predictors=None, exp_path="EXP/tmp"):
    exp_path = Path(exp_path)
    cfg = read_pycfg(cfg_path)
    dataset_loader = get_class_instance_by_config(cfg["dataset_loader"])
    if not predictors:
        predictor_loader = PredictorLoader(cfg["models"], exp_path=exp_path)
        predictors = predictor_loader.load()
    optimizer = get_class_instance_by_config(cfg["optimizer"], exp_path=exp_path, dataset_loader=dataset_loader, predictors=predictors)
    return cfg, optimizer


cfg, optimizer = load_config("cfg/cfg_sir_phrase_hclimb_v2.py", predictors=predictors)

[32m2024-12-03 15:57:47.248[0m | [1mINFO    [0m | [36mprompt_opt.dataset_loader.loader_sir_phrase-v1[0m:[36m__init__[0m:[36m9[0m - [1mloading RandomSearch...[0m
[32m2024-12-03 15:57:47.249[0m | [1mINFO    [0m | [36mprompt_opt.dataset_loader.loader_sir_phrase-v1[0m:[36m__init__[0m:[36m13[0m - [1mdata_dir: /home/drchajan/devel/python/FC/long_sum/data/labeled_datasets/sir1.0_triple_manual_phrases.jsonl[0m
[32m2024-12-03 15:57:47.252[0m | [1mINFO    [0m | [36mprompt_opt.dataset_loader.loader_sir_phrase-v1[0m:[36m__init__[0m:[36m16[0m - [1mdataset loaded: trn(5), tst(15)[0m
[32m2024-12-03 15:57:47.253[0m | [1mINFO    [0m | [36mprompt_opt.dataset_loader.loader_sir_phrase-v1[0m:[36m__init__[0m:[36m30[0m - [1mdataset output schema:
{
  "type": "array",
  "items": {
    "type": "string"
  }
}[0m
[32m2024-12-03 15:57:47.255[0m | [1mINFO    [0m | [36mprompt_opt.optimizers.hill_climber[0m:[36m__init__[0m:[36m16[0m - [1mloading RandomSearch

Reading Python config: cfg/cfg_sir_phrase_hclimb_v2.py


[32m2024-12-03 15:57:48.261[0m | [1mINFO    [0m | [36mprompt_opt.ops.score_json[0m:[36m__init__[0m:[36m13[0m - [1mloading ScoreObjectAligner...[0m
[32m2024-12-03 15:57:48.262[0m | [1mINFO    [0m | [36mprompt_opt.ops.score_json[0m:[36m__init__[0m:[36m16[0m - [1mdataset metric schema:
{
  "type": "array",
  "items": {
    "type": "string",
    "score": "jaro",
    "threshold": 0.5
  },
  "order": "align"
}[0m
[32m2024-12-03 15:57:48.263[0m | [1mINFO    [0m | [36mprompt_opt.ops.score_json[0m:[36m__init__[0m:[36m19[0m - [1mdataset metric score_key: oa-05[0m
[32m2024-12-03 15:57:48.263[0m | [1mINFO    [0m | [36mprompt_opt.ops.score_json[0m:[36m__init__[0m:[36m13[0m - [1mloading ScoreObjectAligner...[0m
[32m2024-12-03 15:57:48.264[0m | [1mINFO    [0m | [36mprompt_opt.ops.score_json[0m:[36m__init__[0m:[36m16[0m - [1mdataset metric schema:
{
  "type": "array",
  "items": {
    "type": "string",
    "score": "jaro",
    "threshold": 0.7

In [6]:
# import some older archive to test
archive = read_jsonl("/home/drchajan/devel/python/FC/prompt_opt/EXP/sir_phrase-V2/seed_114529/archive.jsonl")

In [7]:
# select best candidate
rank_idxs = rank_candidates(archive, "tst", "oa-07") # indices of candidates, sorted by decreasing score
candidate = archive[rank_idxs[10]]

In [8]:
pf(candidate2prompt_md(candidate))

Perform the following steps to convert a query to an answer:

1. Inspect the query to identify sentences that contain attribution information, which may be indicated by specific
words or phrases, such as "According to," "Stated," or "Claimed."
2. Within these sentences, look for verbs or phrases that introduce quotes, statements, or references to sources.
Examples of such verbs or phrases include "Stated," "Reported," or "Claimed."
3. Identify the specific verbs or phrases that indicate attribution and extract them from the query. Take into account
different languages and variations of these verbs or phrases.
4. Format the extracted verbs or phrases into a list, where each element is a string representing a verb or phrase
indicating attribution. If a sentence contains multiple verbs or phrases indicating attribution, extract all of them and
add them to the list as separate elements.
5. Review the list to ensure that it accurately represents the attribution information in the query. Ver

In [11]:
def store_fn():
    pass

ret = optimizer.mutate_op.mutate(candidate, n_neighbors=1, pel=lambda c: optimizer.pel.predict_evaluate_log(c, store_fn), rng=rng)
ret

[32m2024-12-03 16:03:31.270[0m | [1mINFO    [0m | [36mprompt_opt.ops.mutate[0m:[36mmutate[0m:[36m81[0m - [1mgenerating neighbor 1/1[0m
[32m2024-12-03 16:03:58.158[0m | [1mINFO    [0m | [36mprompt_opt.optimizers.predict_evaluate[0m:[36mpredict_evaluate_log[0m:[36m112[0m - [1m    making predictions for "trn"...[0m
[32m2024-12-03 16:04:49.545[0m | [1mINFO    [0m | [36mprompt_opt.optimizers.predict_evaluate[0m:[36mpredict_candidate_json[0m:[36m70[0m - [1m      done 1/5[0m
[32m2024-12-03 16:04:57.578[0m | [1mINFO    [0m | [36mprompt_opt.optimizers.predict_evaluate[0m:[36mpredict_candidate_json[0m:[36m70[0m - [1m      done 2/5[0m
[32m2024-12-03 16:05:04.412[0m | [1mINFO    [0m | [36mprompt_opt.optimizers.predict_evaluate[0m:[36mpredict_candidate_json[0m:[36m70[0m - [1m      done 3/5[0m
[32m2024-12-03 16:05:11.514[0m | [1mINFO    [0m | [36mprompt_opt.optimizers.predict_evaluate[0m:[36mpredict_candidate_json[0m:[36m70[0m - 

[{'messages': [{'role': 'system',
    'content': 'You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to respond to user input. Follow these steps:\n\n1. Think through the problem step by step; use the "Thinking" section to mark this stage.\n2. Reflect on your thinking to check for any errors or improvements using the "Reflection" section.\n3. Make any necessary adjustments based on your reflection.\n4. Provide your final, concise response in the "Response" section.\n\nImportant: The "Thinking" and "Reflection" sections are only for your internal reasoning process. \nDo not include any part of the final response in these sections. \nThe actual response to the user input must be entirely contained within the "Response" section.\n\nIt is absolutely CRITICAL that all your (the assistant\'s) outputs use Markdown containing exactly three consequent sections "Thinking", "Reflection", and "Response" as follows:\n\n# Thinking\n<Your step-by-step reasoning goes h

In [12]:
len(ret)

1

In [17]:
get_candidate_score(candidate, "trn", "oa-07")

0.7053968253968254

In [16]:
get_candidate_score(ret[0], "trn", "oa-07")

0.18

In [19]:
pf(candidate2prompt_md(ret[0]))

1.  Inspect the query to identify sentences that contain attribution information, which may be indicated by specific
words or phrases, such as "According to," "Stated," or "Claimed."
2.  Within these sentences, look for verbs or phrases that introduce quotes, statements, or references to sources.
Examples of such verbs or phrases include "Stated," "Reported," or "Claimed."
3.  Identify the specific verbs or phrases that indicate attribution and extract them from the query. Consider the
following attribution verbs or phrases: 'said', 'reported', 'claimed', 'stated', 'claimed', etc. Also, consider
variations of these verbs or phrases in different languages.
4.  Include verbs or phrases that indicate direct or indirect attribution, such as "popsala" or "vzpomíná".
5.  Exclude phrases that are not directly related to attribution, such as "pro Radiožurnál".
6.  Format the extracted verbs or phrases into a list, where each element is a string representing a verb or phrase
indicating attribut

In [58]:
for e in ret:
    print(e["role"].upper())
    pf(e["content"])
    print("="*120)

SYSTEM
You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to respond to user input. Follow
these steps:

1. Think through the problem step by step; use the "Thinking" section to mark this stage.
2. Reflect on your thinking to check for any errors or improvements using the "Reflection" section.
3. Make any necessary adjustments based on your reflection.
4. Provide your final, concise response in the "Response" section.

Important: The "Thinking" and "Reflection" sections are only for your internal reasoning process.
Do not include any part of the final response in these sections.
The actual response to the user input must be entirely contained within the "Response" section.

It is absolutely CRITICAL that all your (the assistant's) outputs use Markdown containing exactly three consequent
sections "Thinking", "Reflection", and "Response" as follows:

# Thinking
<Your step-by-step reasoning goes here. This is your internal thought process, not the final re