In [1]:
from mvodolagin_personal_imports import *

load_dotenv()

True

In [2]:
from mvodolagin_personal_imports.langchain_stuff import *

In [3]:
data_dir = Path(r"E:\Work\TextTailor\repos\text_tailor_scrapers\texttailor\texttailor\dev\data")

In [4]:
for site_dir in data_dir.iterdir():
    if not site_dir.is_dir():
        continue
    item_files = list(site_dir.glob("item_*.json"))
    if len(item_files) < 20:
        continue
    break

In [5]:
examples = []

random.shuffle(item_files)

for i, fp in enumerate(item_files):
    data = json.loads(fp.read_text(encoding="utf-8"))
    original_description_text = "\n\n".join(data["matching_texts"])
    examples.append(original_description_text)
    if i > 5:
        break

In [14]:
import langsmith

langsmith_client = langsmith.client.Client()
dataset_name = "Short Test"

# dataset = langsmith_client.create_dataset(dataset_name=dataset_name)
# for e in examples:
#     langsmith_client.create_example(dataset_name=dataset_name, inputs={"original_description": e})

In [6]:
def get_baseline_chain(llm=None):
    if not llm:
        llm = basic_llm
    role_message = """You are an expert copywriter specialized in crafting engaging and SEO-optimized product descriptions for an online furniture store. Your task is to transform basic product descriptions into compelling, informative content that appeals to potential customers and search engines alike. Each description should be around 400 words, creatively written to highlight the features and benefits of the product while incorporating relevant keywords to boost search engine visibility. Your writing should be clear, lively, and persuasive, designed to attract and retain customer interest and drive sales."""

    human_template = "{original_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)
    chain = full_prompt | llm | RunnableLambda(lambda x: x.content)
    return chain


def wrapped_chain(inputs, llm=None):
    chain = get_baseline_chain(llm)
    return chain.invoke(inputs)

In [15]:
from langsmith.evaluation import run_evaluator
import langsmith.schemas


@run_evaluator
def eval_general(run: langsmith.schemas.Run, example: langsmith.schemas.Example) -> dict:
    role_message = """
    You are an expert evaluator tasked with analyzing and critiquing product descriptions. Your primary role is to assess the effectiveness, creativity, and SEO alignment of the content. Provide a short and concise feedback list that outlines what is good and what is abad. Additionally, assign a score out of 100 based on the overall quality, taking into account factors like clarity, engagement, keyword integration, accuracy, and stylistic appeal. 
    Write the result as a JSON with keys of "good", "bad", "score".
    """

    human_template = "{new_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)

    prediction = run.outputs.get("output", "")

    eval_res = (full_prompt | basic_llm).invoke({"new_description": prediction})
    result = {"comment": eval_res.content}
    try:
        eval_score = safe_json_loads(eval_res.content)
        if "score" in eval_score:
            result["score"] = float(eval_score.pop("score"))
        result["correction"] = eval_score
    except Exception as e:
        pass

    print(result)

    return {"key": "general", **result}


View the evaluation results for experiment: 'test-a58f97' at:
https://smith.langchain.com/o/8e224494-8563-4197-8d31-d9119b399732/datasets/e9d4192a-027a-4e11-9087-87c8623ae821/compare?selectedSessions=bdcdcebc-f3cc-4e24-9954-24722be5314c


0it [00:00, ?it/s]

In [20]:
import langsmith

from langsmith.evaluation import evaluate
from functools import partial

langsmith_client = langsmith.client.Client()

run_metadata = {"version": "dev_baseline_1"}

qqq = evaluate(
    partial(wrapped_chain, llm=basic_llm),
    data="Short Test",
    metadata={**run_metadata, "model": "3.5"},
    evaluators=[eval_general],
    experiment_prefix="test",
    client=langsmith_client
)

View the evaluation results for experiment: 'test-1774eb' at:
https://smith.langchain.com/o/8e224494-8563-4197-8d31-d9119b399732/datasets/e9d4192a-027a-4e11-9087-87c8623ae821/compare?selectedSessions=c0d3eae6-7cfb-430b-ab19-5938249eae47


0it [00:00, ?it/s]

{'comment': '{\n  "good": [\n    "Engaging and descriptive language creates a vivid image for the reader",\n    "Highlights the unique features and benefits of the dresser effectively",\n    "Incorporates keywords related to coastal decor and furniture seamlessly"\n  ],\n  "bad": [\n    "Could improve by including specific dimensions or measurements for the dresser",\n    "Lacks information about the materials used in crafting the dresser"\n  ],\n  "score": 85\n}', 'score': 85.0, 'correction': {'good': ['Engaging and descriptive language creates a vivid image for the reader', 'Highlights the unique features and benefits of the dresser effectively', 'Incorporates keywords related to coastal decor and furniture seamlessly'], 'bad': ['Could improve by including specific dimensions or measurements for the dresser', 'Lacks information about the materials used in crafting the dresser']}}
{'comment': '{\n  "good": [\n    "Engaging and descriptive language that paints a vivid picture for the r

View the evaluation results for experiment: 'test-fce24f' at:
https://smith.langchain.com/o/8e224494-8563-4197-8d31-d9119b399732/datasets/e9d4192a-027a-4e11-9087-87c8623ae821/compare?selectedSessions=b563140d-7641-44e6-8d85-5b8e35dff078


0it [00:00, ?it/s]