In [1]:
# !pip install python-dotenv

In [1]:
from dotenv import load_dotenv
from langchain import HuggingFaceHub

load_dotenv()

True

In [None]:
hub = HuggingFaceHub(repo_id="NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO")

In [3]:
from langchain import PromptTemplate

In [36]:
tasks_template = """<|im_start|>system
You are a helpful designer for a childrens digital game company<|im_end|>
<|im_start|>user
{name} is {age} years old and has a reading level age of {reading_level}. They need help with {teaching_task}.

Make a list of 5 suitable tasks to learn {teaching_task}.

Just list out each item 1 by 1 as a JSON list. Only provide the list of tasks, do not include the question or any other information, just the list of tasks.<|im_end|>
<|im_start|>assistant
"""

tasks_prompt = PromptTemplate(
    template = tasks_template,
    input_variables=["name", "age", "reading_level", "teaching_task"]
)

dialogue_template = """<|im_start|>system
You are a helpful designer for a childrens digital game company<|im_end|>
<|im_start|>user
{name} is {age} years old and has a reading level age of {reading_level}. They need help with {teaching_task}.

Question: What dialogue would be helpful in a game where the tasks are:

{tasks}

Answer: Let's think step by step.<|im_end|>
<|im_start|>assistant
"""

dialogue_prompt = PromptTemplate(
    template = dialogue_template, 
    input_variables=["name", "age", "reading_level", "teaching_task", "tasks"]
)

In [None]:
# Define your desired data structure.
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser


class Tasks(BaseModel):
    list: str = Field(description="list of tasks to learn a teaching task")

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Tasks)

tasks_prompt_json = PromptTemplate(
    template=tasks_template,
    input_variables=["name", "age", "reading_level", "teaching_task"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

In [37]:
tasks_chain = tasks_prompt | hub 
# dialogue_chain = dialogue_prompt | hub.bind(stop="<|im_end|>")

# chain = (
#     tasks_chain | dialogue_chain
# )

# Langsmith

In [8]:
from langsmith import Client
client = Client()

## Create dataset

In [12]:
# text = """How to get ready for school
# How to identify shapes
# How to read body language
# How to understand facial expressions
# How to share with others
# How to empathise with others
# How to handle identify emotions
# How to deal with emotions
# How to spell their name
# How to solve a maze
# How to tidy up
# How to ask for help
# How to brush teeth
# How to use the toilet
# How to deal with loud noises
# How to deal with bright lights
# How to deal with strong smells
# How to make friends
# How to help others"""

# dataset = client.create_dataset(
#     dataset_name="DynoLearn Dataset",
#     description="Initial Prompts",
# )

# client.create_examples(
#     inputs=[{"name": "William", "age": '7', "reading_level": '5', "teaching_task": q} for q in text.split("\n")],
#     outputs=[],
#     dataset_id=dataset.id,
# )

## Evaluate

In [34]:
from langchain.smith import RunEvalConfig
from langsmith.evaluation import EvaluationResult, run_evaluator
from readability import Readability

@run_evaluator
def Readability_eval(run, example) -> EvaluationResult:
    prediction = run.outputs.get("output") or ""
    try:
        score = Readability(prediction).spache().score
    except:
        score = -1
    return EvaluationResult(key="Readability", score=score)
    
eval_config = RunEvalConfig(
    # eval_llm=eval_llm,
    custom_evaluators=[Readability_eval],
    # # You can also use a prebuilt evaluator
    # # by providing a name or RunEvalConfig.<configured evaluator>
    # evaluators=[
    #     # You can specify an evaluator by name/enum.
    #     # In this case, the default criterion is "helpfulness"
    #     "criteria",
    #     # Or you can configure the evaluator
    #     RunEvalConfig.Criteria("harmfulness"),
    #     RunEvalConfig.Criteria(
    #         {
    #             "cliche": "Are the lyrics cliche?"
    #             "Respond Y if they are, N if they're entirely unique."
    #         }
    #     ),
    # ],
)

In [38]:
client.run_on_dataset(
    dataset_name="DynoLearn Dataset",
    llm_or_chain_factory=tasks_chain,
    evaluation=eval_config,
    verbose=True,
    project_name="runnable-test-19",
    # Any experiment metadata can be specified here
    project_metadata={"version": "1.0.0"},
)

View the evaluation results for project 'runnable-test-19' at:
https://smith.langchain.com/o/cb290b0a-415d-5ed7-8ecf-fc36743479bf/datasets/116b0e6d-2428-4ec4-9988-e60242606aae/compare?selectedSessions=c3d5eae0-4ebf-4302-a3b9-68f23285138b

View all tests for Dataset DynoLearn Dataset at:
https://smith.langchain.com/o/cb290b0a-415d-5ed7-8ecf-fc36743479bf/datasets/116b0e6d-2428-4ec4-9988-e60242606aae
[------------------------------------------------->] 19/19

Unnamed: 0,output,feedback.Readability,error,execution_time,run_id
count,19,19.0,0.0,19.0,19
unique,19,,0.0,,19
top,"[\n ""1. Share toys or belongings with friends...",,,,1cb67268-cb68-438d-a240-143594597e19
freq,1,,,,1
mean,,-1.0,,4.768465,
std,,0.0,,1.277036,
min,,-1.0,,2.966708,
25%,,-1.0,,3.860333,
50%,,-1.0,,4.650077,
75%,,-1.0,,5.618631,


{'project_name': 'runnable-test-19',
 'results': {'9fe928cd-26c2-4eb3-ac0a-6f7993a783e9': {'input': {'age': '7',
    'name': 'William',
    'reading_level': '5',
    'teaching_task': 'How to help others'},
   'feedback': [EvaluationResult(key='Readability', score=-1, value=None, comment=None, correction=None, evaluator_info={}, source_run_id=None, target_run_id=None)],
   'execution_time': 5.766531,
   'run_id': '1cb67268-cb68-438d-a240-143594597e19',
   'output': '[\n  "1. Share toys or belongings with friends",\n  "2. Compliment others and spread kindness",\n  "3. Be a good listener and offer support when someone is upset",\n  "4. Participate in community service activities or events",\n  "5. Offer to help with chores or tasks around the house"\n]'},
  'dbb3ea8f-968d-400d-95fd-c9399da7ce99': {'input': {'age': '7',
    'name': 'William',
    'reading_level': '5',
    'teaching_task': 'How to deal with strong smells'},
   'feedback': [EvaluationResult(key='Readability', score=-1, value