### Using Language models and chains to evaluate other LLM Chains
But the real question is who evaluates the evaluator

In [12]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai.chat_models import ChatOpenAI
from langchain.chains import LLMChain, RetrievalQA
from langchain_community.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores.docarray import DocArrayInMemorySearch
from dotenv import load_dotenv

In [3]:
load_dotenv() # load api keys and base url

True

In [4]:
loader = CSVLoader(file_path="outdoor.csv")

In [5]:
docs = loader.load()

In [6]:
print(docs[:3])

[Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'outdoor.csv', 'row': 0}), Document(page_content=': 1\nname: Recycled Waterhog Dog Mat, Chevron Weave\ndescription: Protect your floors from spills and splashing with our 

In [7]:
len(docs)

1000

In [13]:
embedding = OpenAIEmbeddings(model="togethercomputer/m2-bert-80M-2k-retrieval")

In [14]:
embedding.embed_query("testing embeddings")



[-0.05245162118211203,
 0.05645931955131899,
 0.10504133041451466,
 0.01068242586986969,
 0.03496676056167221,
 0.004363553586782342,
 0.002927857704159514,
 0.04175213131697163,
 -0.014736010615094362,
 0.019382347209905715,
 0.008989101335313124,
 -0.02073283573096696,
 -0.0421201222055244,
 0.010041881236716054,
 -0.06129839013498767,
 -0.06455001251167955,
 0.019523920091200325,
 0.05438287664029435,
 -0.003329031812614407,
 -0.02092046806216458,
 0.04232470747408731,
 0.010589209624544989,
 -0.051346875262813216,
 -0.019031713011290256,
 -0.02191913292330565,
 -0.031675541371040526,
 0.05129228822645335,
 0.06672560574101703,
 -0.00676640709898774,
 -0.002716434719721542,
 -0.019156075809461947,
 -0.02044322525656584,
 0.06496073474570811,
 0.01049319772998446,
 -0.0039069301863140895,
 0.027708333987600604,
 0.061301493528793174,
 -0.015113193102206697,
 0.029812557981768444,
 0.061515674407968755,
 0.04138263284330097,
 0.03973370059266076,
 0.03435884827007211,
 -0.003813776735

In [20]:
db = DocArrayInMemorySearch.from_documents(docs, embedding)



In [21]:
db.similarity_search("sunflower", k=2)



[Document(page_content=": 580\nname: Juju Baetis Craven's Dual Duo\ndescription: Imported. Questions? Reach out to our customer service team for further assistance.", metadata={'source': 'outdoor.csv', 'row': 580}),
 Document(page_content=': 937\nname: Garmin eTrex Journeyer\ndescription: The eTrex 20x is a handheld GPS that is a longtime favorite of outdoor adventurers. It has been updated with a higher resolution display and larger internal memory. Its specs include dimensions of 2.1"L x 4"W x 1.3"D, 5 oz. with batteries, a 2.2" diagonal display with a resolution of 240 x 320 pixels, and two AA batteries (not included; NiMH or Lithium recommended) with up to 25 hours of battery life. It can store 2,000 waypoints and 200 routes, and its track log can save up to 200 tracks with 10,000 points. Its construction is IPX7 waterproof. Its additional features include a large display with 65K color and 3.7 GB of internal memory and a microSD card slot for loading additional maps. It comes prel

In [22]:
retriever = db.as_retriever()

In [23]:
mistral7b = ChatOpenAI(model="mistralai/Mistral-7B-Instruct-v0.2")

In [25]:
test_prompt = "Hello, my name is {name}"
p_template = ChatPromptTemplate.from_template(test_prompt)
prompt = p_template.format_messages(name="Kosi")


In [69]:
p_template.input_types

{}

In [27]:
t_chain = LLMChain(llm=mistral7b, prompt=p_template)

In [28]:
t_chain.predict(name="Kosi")

"Hello Kosi, nice to meet you! How can I assist you today? Is there a specific question or topic you'd like to discuss? I'm here to help answer any questions you might have. Let me know if there's anything on your mind.\n\nAdditionally, if you have any requests for fun facts, jokes, or just want to chat about a particular topic, feel free to share that as well! I'm here to make this interaction as enjoyable and informative as possible for you. So, what's on your mind, Kosi? Let me know!"

In [31]:
index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch, embedding=embedding).from_loaders([loader])



In [32]:
rqa_chain = RetrievalQA.from_chain_type(
    llm=mistral7b, 
    chain_type="stuff", 
    retriever=index.vectorstore.as_retriever(), 
    verbose=True,
    chain_type_kwargs = {
        "document_separator": "<<<<>>>>>"
    }
)

In [None]:
# rqa_chain.run("test")

### Evaluate application using hard code examples

In [33]:
examples = [
    {
        "query": "Do the Cozy Comfort Pullover Set\
        have side pockets?",
        "answer": "Yes"
    },
    {
        "query": "What collection is the Ultra-Lofty \
        850 Stretch Down Hooded Jacket from?",
        "answer": "The DownTek collection"
    }
]

In [34]:
from langchain.evaluation.qa import QAGenerateChain

In [36]:
example_gen_chain = QAGenerateChain.from_llm(mistral7b)

In [37]:
new_examples = example_gen_chain.apply_and_parse(
    [{"doc": t} for t in docs[:5]]
)



In [40]:
from pprint import pprint

In [43]:
new_examples = [x['qa_pairs'] for x in new_examples]

In [44]:
new_examples

[{'query': "What is the name and description of the Women's Campside Oxfords mentioned in the document?", 'answer': "The Women's Campside Oxfords are a comfortable lace-to-toe Oxford shoe with a super-soft canvas, thick cushioning, and quality construction. The size and fit recommendation is to order a regular shoe size, and for half sizes not offered, to order up next whole size. The shoe weighs approximately 1 lb. 1 oz. per pair, and features a soft canvas material, a comfortable EVA innersole with Cleansport NXT® antimicrobial odor control, a vintage hunt, fish and camping motif on the innersole, a moderate arch contour, an EVA foam midsole, and a chain-tread-inspired molded rubber outsole. The shoe is imported."}, {'query': 'What are the dimensions of the Small Recycled Waterhog Dog Mat?', 'answer': 'The Small Recycled Waterhog Dog Mat has dimensions of 18 inches by 28 inches.'}, {'query': 'What are some features of the "Infant and Toddler Girls\' Coastal Chill Swimsuit, Two-Piece"

In [45]:
examples += new_examples

In [46]:
len(examples)

7

In [47]:
import langchain
langchain.debug = True

In [48]:
rqa_chain.run(examples[0]["query"])



[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "Do the Cozy Comfort Pullover Set        have side pockets?"
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[1:chain:RetrievalQA > 3:chain:StuffDocumentsChain > 4:chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "Do the Cozy Comfort Pullover Set        have side pockets?",
  "context": ": 169\nname: Cozy Leaf Flannel Comforter Cover Collection\ndescription: Our bestselling Ultrasoft Comfort Flannel comforter cover is woven exclusively in Portugal and features a beautiful, multicolored leaf motif inspired by the great outdoors. \n\nSpecs\nTwin - Dimensions: 68\" x 88\"\nFull - Dimensions: 81\" x 88\"\nQueen - Dimensions: 88\" x 96\"\nKing - Dimensions: 107\" x 96\"\nStandard Sham (1) - Dimensions: 20\" x 26\" (excluding 2½\" border)\n\nFabric & Ca

'Yes, the Cozy Comfort Pullover Set has side pockets.'

In [49]:
langchain.debug = False

In [50]:
predictions = rqa_chain.apply(examples)





[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m





[1m> Finished chain.[0m


[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [51]:
from langchain.evaluation.qa import QAEvalChain

In [53]:
evalchain = QAEvalChain.from_llm(llm=mistral7b)

In [54]:
graded_outputs = evalchain.evaluate(examples, predictions)

In [67]:
for i, eg in enumerate(examples):
    print(f"Example {i}:")
    print("Question: " + predictions[i]['query'])
    print("Real Answer: " + predictions[i]['answer'])
    print("Predicted Answer: " + predictions[i]['result'])

    print("Predicted Grade: " + str(graded_outputs[i]['results'].startswith("CORRECT")))
    print()

Example 0:
Question: Do the Cozy Comfort Pullover Set        have side pockets?
Real Answer: Yes
Predicted Answer: Yes, the Cozy Comfort Pullover Set does have side pockets.
Predicted Grade: True

Example 1:
Question: What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?
Real Answer: The DownTek collection
Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.
Predicted Grade: False

Example 2:
Question: What is the name and description of the Women's Campside Oxfords mentioned in the document?
Real Answer: The Women's Campside Oxfords are a comfortable lace-to-toe Oxford shoe with a super-soft canvas, thick cushioning, and quality construction. The size and fit recommendation is to order a regular shoe size, and for half sizes not offered, to order up next whole size. The shoe weighs approximately 1 lb. 1 oz. per pair, and features a soft canvas material, a comfortable EVA innersole with Cleansport NXT® antimicrobial 