In [21]:
import json
import os
import random

from mvodolagin_personal_imports import *

load_dotenv()

True

In [4]:
from mvodolagin_personal_imports.langchain_stuff import *

In [5]:
data_dir = Path(r"E:\Work\TextTailor\repos\text_tailor_scrapers\texttailor\texttailor\dev\data")

In [9]:
for site_dir in data_dir.iterdir():
    if not site_dir.is_dir():
        continue
    item_files = list(site_dir.glob("item_*.json"))
    if len(item_files) < 20:
        continue
    break

In [20]:
examples = []

random.shuffle(item_files)

for i, fp in enumerate(item_files):
    data = json.loads(fp.read_text(encoding="utf-8"))
    original_description_text = "\n\n".join(data["matching_texts"])
    examples.append(original_description_text)
    if i > 5:
        break

In [15]:
data.keys()

dict_keys(['url', 'unique_content', 'matching_texts', 'type'])

In [16]:
data["url"]

'https://ahfshop.com/collections/living-room/products/traemore-sofa'

In [19]:
original_description_text

'Don’t be shy—take a seat on this farmhouse-style sofa. Dressed in a modern and relaxed linen-weave upholstery, it’s a luxurious complement to rustic accents with its light and airy palette and elegantly turned feet. What a picture-perfect finish to any shabby chic or countryside living room.'

In [26]:
import langsmith

langsmith_client = langsmith.client.Client()
dataset_name = "Short Test"

# dataset = langsmith_client.create_dataset(dataset_name=dataset_name)
# for e in examples:
#     langsmith_client.create_example(dataset_name=dataset_name, inputs={"original_description": e})

In [32]:
omni_llm = ChatOpenAI(model_name="gpt-4o", **common_settings)

In [28]:
import openai

In [67]:
def get_baseline_chain(inputs, llm=None):
    if not llm:
        llm = basic_llm
    role_message = """You are an expert copywriter specialized in crafting engaging and SEO-optimized product descriptions for an online furniture store. Your task is to transform basic product descriptions into compelling, informative content that appeals to potential customers and search engines alike. Each description should be around 400 words, creatively written to highlight the features and benefits of the product while incorporating relevant keywords to boost search engine visibility. Your writing should be clear, lively, and persuasive, designed to attract and retain customer interest and drive sales."""

    human_template = "{original_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)
    chain = full_prompt | llm | RunnableLambda(lambda x: x.content)
    return chain


In [60]:
from functools import partial

run_metadata = {"version": "dev_baseline_1"}
langsmith_client.run_on_dataset(
    llm_or_chain_factory=partial(get_baseline_chain, llm=basic_llm), dataset_name=dataset_name,
    project_metadata={**run_metadata, "model": "3.5"},
    evaluation=["eval_general"]
)

View the evaluation results for project 'terrific-time-20' at:
https://smith.langchain.com/o/d7a1c614-828d-5fe5-a5f1-6b6c2644f384/datasets/e2d85fc8-47fb-4143-90c8-a36c8b26ce68/compare?selectedSessions=a4ed3ae5-db0e-452a-9db2-4db71e47f431

View all tests for Dataset Short Test at:
https://smith.langchain.com/o/d7a1c614-828d-5fe5-a5f1-6b6c2644f384/datasets/e2d85fc8-47fb-4143-90c8-a36c8b26ce68


AttributeError: 'list' object has no attribute 'evaluators'

In [40]:
langsmith_client.run_on_dataset(llm_or_chain_factory=partial(get_baseline_chain, llm=omni_llm),
                                dataset_name=dataset_name, project_metadata={**run_metadata, "model": "4o"})


LangSmithError: Failed to GET /datasets in LangSmith API. HTTPSConnectionPool(host='api.smith.langchain.com', port=443): Read timed out. (read timeout=10.0)


In [41]:
original_description_text

'This chest of drawers is the ultimate statement piece for a coastal cottage or shabby chic inspired retreat. Whitewashed finish on the drawers and sides is wonderfully easy on the eyes. Paired with the unique plank-effect top, it’s a driftwoody look that has our minds drifting away to beachy-keen escapes.'

In [56]:
from langsmith.evaluation import run_evaluator
import langsmith.schemas


@run_evaluator
def eval_general(run: langsmith.schemas.Run, example: langsmith.schemas.Example) -> dict:
    role_message = """
    You are an expert evaluator tasked with analyzing and critiquing product descriptions. Your primary role is to assess the effectiveness, creativity, and SEO alignment of the content. Provide a short and concise feedback list that outlines what is good and what is abad. Additionally, assign a score out of 100 based on the overall quality, taking into account factors like clarity, engagement, keyword integration, accuracy, and stylistic appeal. 
    Write the result as a JSON with keys of "good", "bad", "score".
    """

    human_template = "{new_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)

    prediction = run.outputs.get("output", "")

    eval_res = (full_prompt | basic_llm).invoke({"new_description": prediction})
    result = {"comment": eval_res.content}
    try:
        eval_score = safe_json_loads(eval_res.content)
        if "score" in eval_score:
            result["score"] = float(eval_score.pop("score"))
        result["correction"] = eval_score
    except Exception as e:
        pass

    print(result)

    return {"key": "general", **result}


qq = langsmith_client.evaluate_run(run="7af3dd73-a8a9-492b-8e33-e0f5775a018b", evaluator=eval_general)


{'comment': '{\n    "good": [\n        "Engaging and descriptive language that paints a vivid picture of the product",\n        "Highlights key features and benefits effectively",\n        "Incorporates lifestyle imagery to appeal to potential customers"\n    ],\n    "bad": [\n        "Lacks specific dimensions or measurements that could help customers gauge the size of the table",\n        "Could benefit from mentioning any special design elements or unique selling points that set it apart from similar products",\n        "Limited information on maintenance or care instructions for the table"\n    ],\n    "score": 85\n}', 'score': 85.0, 'correction': {'good': ['Engaging and descriptive language that paints a vivid picture of the product', 'Highlights key features and benefits effectively', 'Incorporates lifestyle imagery to appeal to potential customers'], 'bad': ['Lacks specific dimensions or measurements that could help customers gauge the size of the table', 'Could benefit from men

In [57]:
qq

EvaluationResult(key='general', score=85.0, value=None, comment='{\n    "good": [\n        "Engaging and descriptive language that paints a vivid picture of the product",\n        "Highlights key features and benefits effectively",\n        "Incorporates lifestyle imagery to appeal to potential customers"\n    ],\n    "bad": [\n        "Lacks specific dimensions or measurements that could help customers gauge the size of the table",\n        "Could benefit from mentioning any special design elements or unique selling points that set it apart from similar products",\n        "Limited information on maintenance or care instructions for the table"\n    ],\n    "score": 85\n}', correction={'good': ['Engaging and descriptive language that paints a vivid picture of the product', 'Highlights key features and benefits effectively', 'Incorporates lifestyle imagery to appeal to potential customers'], 'bad': ['Lacks specific dimensions or measurements that could help customers gauge the size of t

In [58]:
qq.correction

{'good': ['Engaging and descriptive language that paints a vivid picture of the product',
  'Highlights key features and benefits effectively',
  'Incorporates lifestyle imagery to appeal to potential customers'],
 'bad': ['Lacks specific dimensions or measurements that could help customers gauge the size of the table',
  'Could benefit from mentioning any special design elements or unique selling points that set it apart from similar products',
  'Limited information on maintenance or care instructions for the table']}

In [63]:
langsmith_client.create_dataset(dataset_name="Tech Test")

Dataset(name='Tech Test', description=None, data_type=<DataType.kv: 'kv'>, id=UUID('6dd35b5f-e202-4eab-90d2-06c198c68f47'), created_at=datetime.datetime(2024, 5, 26, 18, 32, 15, 313192, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 5, 26, 18, 32, 15, 313192, tzinfo=datetime.timezone.utc), example_count=0, session_count=0, last_session_start_time=None)

In [64]:
langsmith_client.create_example(dataset_name="Tech Test", inputs={"original_description": original_description_text})

Example(dataset_id=UUID('6dd35b5f-e202-4eab-90d2-06c198c68f47'), inputs={'original_description': 'This chest of drawers is the ultimate statement piece for a coastal cottage or shabby chic inspired retreat. Whitewashed finish on the drawers and sides is wonderfully easy on the eyes. Paired with the unique plank-effect top, it’s a driftwoody look that has our minds drifting away to beachy-keen escapes.'}, outputs=None, metadata={'dataset_split': ['base']}, id=UUID('e6f418c5-9118-4512-9789-99bd3ee33773'), created_at=datetime.datetime(2024, 5, 26, 18, 32, 18, 399936, tzinfo=datetime.timezone.utc), modified_at=datetime.datetime(2024, 5, 26, 18, 32, 18, 399936, tzinfo=datetime.timezone.utc), runs=[], source_run_id=None)

In [68]:
from langsmith.evaluation import evaluate

qqq = evaluate(
    partial(get_baseline_chain, llm=basic_llm),
    data="Tech Test",
    metadata={**run_metadata, "model": "3.5"},
    evaluators=[eval_general],
    experiment_prefix="test",
    client=langsmith_client
)


View the evaluation results for experiment: 'test-4acc5e' at:
https://smith.langchain.com/o/d7a1c614-828d-5fe5-a5f1-6b6c2644f384/datasets/6dd35b5f-e202-4eab-90d2-06c198c68f47/compare?selectedSessions=14e87ba4-4f0a-4b1a-8ad0-debe99f3f3a7


0it [00:00, ?it/s]

{'comment': '{\n  "good": [\n    "Engaging and descriptive language creates a vivid image for the reader",\n    "Use of sensory details enhances the appeal of the product",\n    "Incorporates lifestyle imagery to evoke emotions and aspirations",\n    "Effectively highlights both aesthetic and functional features of the product",\n    "Promotes a sense of tranquility and coastal charm effectively"\n  ],\n  "bad": [\n    "Could benefit from more specific details about dimensions or materials used",\n    "Lacks specific keywords that potential customers might search for (e.g., \'coastal furniture\', \'shabby chic chest of drawers\')",\n    "Repetitive use of certain phrases could be improved for variety"\n  ],\n  "score": 85\n}', 'score': 85.0, 'correction': {'good': ['Engaging and descriptive language creates a vivid image for the reader', 'Use of sensory details enhances the appeal of the product', 'Incorporates lifestyle imagery to evoke emotions and aspirations', 'Effectively highligh

In [69]:
qqq

<ExperimentResults test-4acc5e>

In [72]:
from langfuse.callback import CallbackHandler
langfuse_handler = CallbackHandler(
    public_key="pk-lf-d229a547-fb20-4de6-94d2-af4565b61b73",
    secret_key="sk-lf-d0228d2a-9dcd-4968-b170-88367f8e03ae",
    host="https://langfuse.ef-coolify.calmmage.com"
)

basic_llm.invoke("What's cookin?", config={"callbacks": [langfuse_handler]})

AIMessage(content='I am an AI and I do not cook, but I am here to assist you with any questions or tasks you may have! How can I help you today?', response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 12, 'total_tokens': 45}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None})

In [82]:
invoke_config = {}
# chain.with_config(**invoke_config).invoke  # Yeah, I know
if os.environ.get("LANGFUSE_PUBLIC_KEY") and os.environ.get("LANGFUSE_SECRET_KEY") and os.environ.get("LANGFUSE_HOST"):
    langfuse_handler = CallbackHandler(
        public_key=os.environ.get("LANGFUSE_PUBLIC_KEY"),
        secret_key=os.environ.get("LANGFUSE_SECRET_KEY"),
        host=os.environ.get("LANGFUSE_HOST"),
    )
    invoke_config["callbacks"] = invoke_config.get("callbacks", []) + [langfuse_handler]

In [70]:
import langchain_core.runnables.config

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001CBC5701880>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001CBC5707F10>, model_name='gpt-3.5-turbo-0125', openai_api_key=SecretStr('**********'), openai_proxy='', request_timeout=60.0, max_retries=3, http_client=<httpx.Client object at 0x000001CBBC9232E0>)

In [73]:
load_dotenv()

True

In [80]:
from langfuse.decorators import observe
 
def get_baseline_chain(inputs, llm=None):
    if not llm:
        llm = basic_llm
    role_message = """You are an expert copywriter specialized in crafting engaging and SEO-optimized product descriptions for an online furniture store. Your task is to transform basic product descriptions into compelling, informative content that appeals to potential customers and search engines alike. Each description should be around 400 words, creatively written to highlight the features and benefits of the product while incorporating relevant keywords to boost search engine visibility. Your writing should be clear, lively, and persuasive, designed to attract and retain customer interest and drive sales."""

    human_template = "{original_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)
    chain = full_prompt | llm | RunnableLambda(lambda x: x.content)
    return {"output": chain.with_config({"callbacks": [langfuse_handler]}).invoke(inputs)}

In [75]:
get_baseline_chain({"original_description": "This is a test"})

{'output': "Upgrade your living room with our elegant and versatile Velvet Loveseat. This luxurious piece of furniture is more than just a place to sit – it's a statement of style and comfort. \n\nCrafted with a sturdy wooden frame and plush velvet upholstery, this loveseat is designed to provide both durability and sophistication. The rich velvet fabric not only adds a touch of luxury to your space but also offers a soft and cozy spot to relax after a long day.\n\nThe compact size of the loveseat makes it perfect for smaller spaces without compromising on seating capacity. Whether you're curling up with a book or entertaining guests, this loveseat offers ample seating while saving space in your living room.\n\nThe classic design of the Velvet Loveseat is timeless and can effortlessly complement any existing decor. The deep navy color adds a touch of elegance, while the button-tufted backrest and tapered legs bring a touch of mid-century charm to your home.\n\nDon't settle for just any

In [76]:
llm_with_callbacks = basic_llm.with_config({"callbacks": [langfuse_handler]})

In [81]:
get_baseline_chain({"original_description": "This is a test"}, llm=llm_with_callbacks)

{'output': 'Transforming basic product descriptions into compelling, informative content that appeals to potential customers and search engines is crucial for online furniture stores. As an expert copywriter in this field, I understand the importance of crafting engaging and SEO-optimized descriptions to drive sales and increase visibility. Each description should be around 400 words, creatively highlighting the features and benefits of the product while incorporating relevant keywords to boost search engine rankings. My writing is clear, lively, and persuasive, designed to attract and retain customer interest, ultimately leading to increased sales for the online store.'}

In [85]:
from langfuse import Langfuse
 
langfuse_client = Langfuse()

langfuse_client.create_dataset(name="tech_test")
langfuse_client.create_dataset_item(dataset_name="tech_test", input={"original_description": "This is a test"})

DatasetItem(id='ce0c0be0-1044-4a1e-8040-e5af7c1cefc3', status=<DatasetStatus.ACTIVE: 'ACTIVE'>, input={'original_description': 'This is a test'}, expected_output=None, metadata=None, source_trace_id=None, source_observation_id=None, dataset_id='clwo7z19s000nmm28son9hsla', dataset_name='tech_test', created_at=datetime.datetime(2024, 5, 27, 0, 18, 7, 608000, tzinfo=datetime.timezone.utc), updated_at=datetime.datetime(2024, 5, 27, 0, 18, 7, 608000, tzinfo=datetime.timezone.utc))

In [None]:
# def run_langchain_experiment(experiment_name, system_message):
#   dataset = langfuse.get_dataset("capital_cities")
#  
#   for item in dataset.items:
#     handler = item.get_langchain_handler(run_name=experiment_name)
#  
#     completion = run_my_langchain_llm_app(item.input["country"], system_message, handler)
#  
#     handler.trace.score(
#       name="exact_match",
#       value=simple_evaluation(completion, item.expected_output)
#     )



In [92]:

def get_baseline_chain(llm=None):
    if not llm:
        llm = basic_llm
    role_message = """You are an expert copywriter specialized in crafting engaging and SEO-optimized product descriptions for an online furniture store. Your task is to transform basic product descriptions into compelling, informative content that appeals to potential customers and search engines alike. Each description should be around 400 words, creatively written to highlight the features and benefits of the product while incorporating relevant keywords to boost search engine visibility. Your writing should be clear, lively, and persuasive, designed to attract and retain customer interest and drive sales."""

    human_template = "{original_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)
    chain = full_prompt | llm | RunnableLambda(lambda x: x.content)
    return chain

def simple_evaluation(output, expected_output):
    role_message = """
    You are an expert evaluator tasked with analyzing and critiquing product descriptions. Your primary role is to assess the effectiveness, creativity, and SEO alignment of the content. Provide a short and concise feedback list that outlines what is good and what is abad. Additionally, assign a score out of 100 based on the overall quality, taking into account factors like clarity, engagement, keyword integration, accuracy, and stylistic appeal. 
    Write the result as a JSON with keys of "good", "bad", "score".
    """

    human_template = "{new_description}"

    messages = [("system", role_message), ("human", human_template)]
    messages = [(role, trim_extra_whitespace(message)) for role, message in messages]

    full_prompt = ChatPromptTemplate.from_messages(messages)

    eval_res = (full_prompt | basic_llm).with_config(**invoke_config).invoke({"new_description": output})
    result = {"comment": eval_res.content}
    try:
        eval_score = safe_json_loads(eval_res.content)
        if "score" in eval_score:
            result["value"] = float(eval_score.pop("score"))
        result["comment"] = json.dumps(eval_score, indent=2)
    except Exception as e:
        pass

    print(result)

    return {"name": "general", **result}


def make_experiment_name():
    return "not_implemented_yet"


def process_dataset_item(item, experiment_name=None):
    if not experiment_name:
        experiment_name = make_experiment_name()
    
    
    local_handler = item.get_langchain_handler(run_name=experiment_name)
    local_config = {"callbacks": [local_handler]}
    chain = get_baseline_chain()
    
    res = chain.with_config(**local_config).invoke(item.input)
    
    score = simple_evaluation(res, item.expected_output)
    
    local_handler.trace.score(
        **score
    )
    
experiment_name="usage_example_4"
for item in langfuse_client.get_dataset("tech_test").items:
    process_dataset_item(item, experiment_name=experiment_name)
    break


{'comment': '{\n  "good": [\n    "Emphasizes the importance of engaging and SEO-optimized product descriptions",\n    "Highlights the benefits of communicating unique features effectively",\n    "Encourages creating compelling narratives to inspire customer exploration"\n  ],\n  "bad": [\n    "Lacks specific examples of how to improve product descriptions",\n    "Could provide more actionable tips for enhancing SEO alignment",\n    "Does not mention the importance of using relevant keywords"\n  ]\n}', 'value': 75.0}
