In [None]:
import json
import os
import random

from mvodolagin_personal_imports import *

load_dotenv()

In [None]:
from mvodolagin_personal_imports.langchain_stuff import *

In [None]:
data_dir = Path(r"E:\Work\TextTailor\repos\text_tailor_scrapers\texttailor\texttailor\dev\data")

In [None]:
for site_dir in data_dir.iterdir():
    if not site_dir.is_dir():
        continue
    item_files = list(site_dir.glob("item_*.json"))
    if len(item_files) < 20:
        continue
    break

In [None]:
examples = []

random.shuffle(item_files)

for i, fp in enumerate(item_files):
    data = json.loads(fp.read_text(encoding="utf-8"))
    original_description_text = "\n\n".join(data["matching_texts"])
    examples.append(original_description_text)
    if i > 5:
        break

In [None]:
data.keys()

In [None]:
data["url"]

In [None]:
original_description_text

In [None]:
import langsmith

langsmith_client = langsmith.client.Client()
dataset_name = "Short Test"

# dataset = langsmith_client.create_dataset(dataset_name=dataset_name)
# for e in examples:
#     langsmith_client.create_example(dataset_name=dataset_name, inputs={"original_description": e})

In [None]:
omni_llm = ChatOpenAI(model_name="gpt-4o", **common_settings)

In [None]:
import openai

In [None]:
def get_baseline_chain(inputs, llm=None):
    if not llm:
        llm = basic_llm
    role_message = """You are an expert copywriter specialized in crafting engaging and SEO-optimized product descriptions for an online furniture store. Your task is to transform basic product descriptions into compelling, informative content that appeals to potential customers and search engines alike. Each description should be around 400 words, creatively written to highlight the features and benefits of the product while incorporating relevant keywords to boost search engine visibility. Your writing should be clear, lively, and persuasive, designed to attract and retain customer interest and drive sales."""

    human_template = "{original_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)
    chain = full_prompt | llm | RunnableLambda(lambda x: x.content)
    return chain


In [None]:
from functools import partial

run_metadata = {"version": "dev_baseline_1"}
langsmith_client.run_on_dataset(
    llm_or_chain_factory=partial(get_baseline_chain, llm=basic_llm), dataset_name=dataset_name,
    project_metadata={**run_metadata, "model": "3.5"},
    evaluation=["eval_general"]
)

In [None]:
langsmith_client.run_on_dataset(llm_or_chain_factory=partial(get_baseline_chain, llm=omni_llm),
                                dataset_name=dataset_name, project_metadata={**run_metadata, "model": "4o"})


In [None]:
original_description_text

In [None]:
from langsmith.evaluation import run_evaluator
import langsmith.schemas


@run_evaluator
def eval_general(run: langsmith.schemas.Run, example: langsmith.schemas.Example) -> dict:
    role_message = """
    You are an expert evaluator tasked with analyzing and critiquing product descriptions. Your primary role is to assess the effectiveness, creativity, and SEO alignment of the content. Provide a short and concise feedback list that outlines what is good and what is abad. Additionally, assign a score out of 100 based on the overall quality, taking into account factors like clarity, engagement, keyword integration, accuracy, and stylistic appeal. 
    Write the result as a JSON with keys of "good", "bad", "score".
    """

    human_template = "{new_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)

    prediction = run.outputs.get("output", "")

    eval_res = (full_prompt | basic_llm).invoke({"new_description": prediction})
    result = {"comment": eval_res.content}
    try:
        eval_score = safe_json_loads(eval_res.content)
        if "score" in eval_score:
            result["score"] = float(eval_score.pop("score"))
        result["correction"] = eval_score
    except Exception as e:
        pass

    print(result)

    return {"key": "general", **result}


qq = langsmith_client.evaluate_run(run="7af3dd73-a8a9-492b-8e33-e0f5775a018b", evaluator=eval_general)


In [None]:
qq

In [None]:
qq.correction

In [None]:
langsmith_client.create_dataset(dataset_name="Tech Test")

In [None]:
langsmith_client.create_example(dataset_name="Tech Test", inputs={"original_description": original_description_text})

In [None]:
from langsmith.evaluation import evaluate

qqq = evaluate(
    partial(get_baseline_chain, llm=basic_llm),
    data="Tech Test",
    metadata={**run_metadata, "model": "3.5"},
    evaluators=[eval_general],
    experiment_prefix="test",
    client=langsmith_client
)


In [None]:
qqq

In [None]:
from langfuse.callback import CallbackHandler
langfuse_handler = CallbackHandler(
    
)

basic_llm.invoke("What's cookin?", config={"callbacks": [langfuse_handler]})

In [None]:
invoke_config = {}
# chain.with_config(**invoke_config).invoke  # Yeah, I know
if os.environ.get("LANGFUSE_PUBLIC_KEY") and os.environ.get("LANGFUSE_SECRET_KEY") and os.environ.get("LANGFUSE_HOST"):
    langfuse_handler = CallbackHandler(
        public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
        secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
        host=os.environ.get("LANGFUSE_HOST"),
    )
    invoke_config["callbacks"] = invoke_config.get("callbacks", []) + [langfuse_handler]

In [None]:
import langchain_core.runnables.config

In [None]:
load_dotenv()

In [None]:
from langfuse.decorators import observe
 
def get_baseline_chain(inputs, llm=None):
    if not llm:
        llm = basic_llm
    role_message = """You are an expert copywriter specialized in crafting engaging and SEO-optimized product descriptions for an online furniture store. Your task is to transform basic product descriptions into compelling, informative content that appeals to potential customers and search engines alike. Each description should be around 400 words, creatively written to highlight the features and benefits of the product while incorporating relevant keywords to boost search engine visibility. Your writing should be clear, lively, and persuasive, designed to attract and retain customer interest and drive sales."""

    human_template = "{original_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)
    chain = full_prompt | llm | RunnableLambda(lambda x: x.content)
    return {"output": chain.with_config({"callbacks": [langfuse_handler]}).invoke(inputs)}

In [None]:
get_baseline_chain({"original_description": "This is a test"})

In [None]:
llm_with_callbacks = basic_llm.with_config({"callbacks": [langfuse_handler]})

In [None]:
get_baseline_chain({"original_description": "This is a test"}, llm=llm_with_callbacks)

In [None]:
from langfuse import Langfuse
 
langfuse_client = Langfuse()

langfuse_client.create_dataset(name="tech_test")
langfuse_client.create_dataset_item(dataset_name="tech_test", input={"original_description": "This is a test"})

In [None]:
# def run_langchain_experiment(experiment_name, system_message):
#   dataset = langfuse.get_dataset("capital_cities")
#  
#   for item in dataset.items:
#     handler = item.get_langchain_handler(run_name=experiment_name)
#  
#     completion = run_my_langchain_llm_app(item.input["country"], system_message, handler)
#  
#     handler.trace.score(
#       name="exact_match",
#       value=simple_evaluation(completion, item.expected_output)
#     )



In [None]:

def get_baseline_chain(llm=None):
    if not llm:
        llm = basic_llm
    role_message = """You are an expert copywriter specialized in crafting engaging and SEO-optimized product descriptions for an online furniture store. Your task is to transform basic product descriptions into compelling, informative content that appeals to potential customers and search engines alike. Each description should be around 400 words, creatively written to highlight the features and benefits of the product while incorporating relevant keywords to boost search engine visibility. Your writing should be clear, lively, and persuasive, designed to attract and retain customer interest and drive sales."""

    human_template = "{original_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)
    chain = full_prompt | llm | RunnableLambda(lambda x: x.content)
    return chain

def simple_evaluation(output, expected_output):
    role_message = """
    You are an expert evaluator tasked with analyzing and critiquing product descriptions. Your primary role is to assess the effectiveness, creativity, and SEO alignment of the content. Provide a short and concise feedback list that outlines what is good and what is abad. Additionally, assign a score out of 100 based on the overall quality, taking into account factors like clarity, engagement, keyword integration, accuracy, and stylistic appeal. 
    Write the result as a JSON with keys of "good", "bad", "score".
    """

    human_template = "{new_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)

    eval_res = (full_prompt | basic_llm).with_config(**invoke_config).invoke({"new_description": output})
    result = {"comment": eval_res.content}
    try:
        eval_score = safe_json_loads(eval_res.content)
        if "score" in eval_score:
            result["value"] = float(eval_score.pop("score"))
        result["comment"] = json.dumps(eval_score, indent=2)
    except Exception as e:
        pass

    print(result)

    return {"name": "general", **result}


def make_experiment_name():
    return "not_implemented_yet"


def process_dataset_item(item, experiment_name=None):
    if not experiment_name:
        experiment_name = make_experiment_name()
    
    
    local_handler = item.get_langchain_handler(run_name=experiment_name)
    local_config = {"callbacks": [local_handler]}
    chain = get_baseline_chain()
    
    res = chain.with_config(**local_config).invoke(item.input)
    
    score = simple_evaluation(res, item.expected_output)
    
    local_handler.trace.score(
        **score
    )
    
experiment_name="usage_example_4"
for item in langfuse_client.get_dataset("tech_test").items:
    process_dataset_item(item, experiment_name=experiment_name)
    break
