In [1]:
from test_memory import get_llm
from AzureConnection import embeddings 
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.chains import RetrievalQA

Azure connection: connected
Embedding connection: connected


In [2]:
loader = CSVLoader(file_path='OutdoorClothingCatalog_1000.csv')
docs = loader.load()

In [3]:
# Vector Store Creation
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])

llm = get_llm()

# QA Chain Setup
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": ""}
)



In [5]:
# Test with a simple query
test_query = "What sun protection clothing do you have?"
response = qa.invoke({"query": test_query})
print("Response:", response["result"])
print("Source documents:", len(response["source_documents"]))

Response: We have several sun protection clothing options available:

1. **Sun Shield Shirt**: A lightweight shirt with UPF 50+ sun protection, perfect for hiking and beach trips. Available in blue and white colors. Price: $45.00.

2. **Mountain Shirt**: A long sleeve shirt with built-in UPF 30+ sun protection, featuring roll-up sleeves and a chest pocket. Made from lightweight nylon. Price: $55.00.

3. **UV Blocker Hoodie**: A full coverage hoodie for extreme sun conditions, with UPF 50+ protection. It includes thumb holes and an adjustable hood. Made from lightweight polyester. Price: $60.00.

4. **Beach Shirt**: A quick-dry shirt with UPF 50+ sun protection, featuring a button-down collar and chest pocket. Made from polyester. Price: $40.00.
Source documents: 4


In [6]:
from langchain.evaluation.qa import QAGenerateChain

In [7]:
example_gen_chain = QAGenerateChain.from_llm(llm)

In [9]:
# Example 1: Test with specific, targeted questions
# Goal: Test QA system with questions that should have clear answers
print("\n--- Example 1: Specific Questions ---")
specific_questions = [
    "What is the price of the Sun Shield Shirt?",
    "Which products have UPF 50+ protection?",
    "What features does the UV Blocker Hoodie have?",
    "Which shirts are good for outdoor activities?"
]

for question in specific_questions:
    print(f"\nQ: {question}")
    response = qa.invoke({"query": question})
    print(f"A: {response['result'][:200]}...")


--- Example 1: Specific Questions ---

Q: What is the price of the Sun Shield Shirt?
A: The price of the Sun Shield Shirt is $45.00....

Q: Which products have UPF 50+ protection?
A: The products with UPF 50+ protection are:

1. Sun Shield Shirt
2. UV Blocker Hoodie
3. Beach Shirt...

Q: What features does the UV Blocker Hoodie have?
A: The UV Blocker Hoodie features full coverage for extreme sun conditions, thumb holes, an adjustable hood, and is made from lightweight polyester with UPF 50+ protection....

Q: Which shirts are good for outdoor activities?
A: The following shirts are good for outdoor activities:

1. **Sun Shield Shirt**: Lightweight shirt with UPF 50+ sun protection, perfect for hiking and beach trips.

2. **Mountain Shirt**: Long sleeve s...


In [10]:
# Example 2: Test product comparison questions
# Goal: Test QA system's ability to compare and recommend products
print("\n--- Example 2: Comparison Questions ---")
comparison_questions = [
    "What are the differences between Sun Shield Shirt and Mountain Shirt?",
    "Which products are best for hiking?",
    "What are the most expensive items in the catalog?"
]

for question in comparison_questions:
    print(f"\nQ: {question}")
    response = qa.invoke({"query": question})
    print(f"A: {response['result'][:200]}...")


--- Example 2: Comparison Questions ---

Q: What are the differences between Sun Shield Shirt and Mountain Shirt?
A: The Sun Shield Shirt and Mountain Shirt differ in several aspects:

1. **Sun Protection:**
   - Sun Shield Shirt offers UPF 50+ protection.
   - Mountain Shirt provides UPF 30+ protection.

2. **Mater...

Q: Which products are best for hiking?
A: The best products for hiking from the provided context are:

1. **Hiking Pants**: These are durable and water-resistant, with convertible legs, multiple pockets, and reinforced knees, making them perf...

Q: What are the most expensive items in the catalog?
A: The most expensive items in the catalog are the Safari Pants priced at $75.00 and the Hiking Pants priced at $70.00....


In [14]:
import langchain 
langchain.debug = True

In [15]:
# Example 4: Working QAGenerateChain
print("\n--- Example 4: QAGenerateChain Working ---")

qa_generator = QAGenerateChain.from_llm(llm)

# Test with multiple documents
for doc in docs[:5]:
    qa_pair = qa_generator.apply_and_parse([{"doc": doc.page_content}])
    print(f"QA pair: {qa_pair}")


--- Example 4: QAGenerateChain Working ---
[32;1m[1;3m[chain/start][0m [1m[chain:QAGenerateChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "doc": "product_name: Sun Shield Shirt\ndescription: Lightweight shirt with UPF 50+ sun protection for outdoor activities. Perfect for hiking and beach trips. Available in blue and white colors.\nfeatures: UPF 50+ protection\nprice: Moisture-wicking fabric\ncategory: 45.00\nNone: Shirts"
    }
  ]
}
[32;1m[1;3m[llm/start][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are a teacher coming up with questions to ask on a quiz. \nGiven the following document, please generate a question and answer based on that document.\n\nExample Format:\n<Begin Document>\n...\n<End Document>\nQUESTION: question here\nANSWER: answer here\n\nThese questions should be detailed and be based explicitly on information in the document. Begin!\n\n<Begin Document>\np



[36;1m[1;3m[llm/end][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] [3.58s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "QUESTION: What is the primary feature of the Sun Shield Shirt that makes it suitable for outdoor activities like hiking and beach trips?\n\nANSWER: The primary feature of the Sun Shield Shirt that makes it suitable for outdoor activities is its UPF 50+ sun protection.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null,
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "self_harm": {
              "filtered": false,
              "severity": "safe"
            },
            "sexual": {
              "filtered": false,
              "severity": "safe"
            },
            "violence": {
              "filtered": false,
              "severity": "safe"
            }
 



[36;1m[1;3m[llm/end][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] [1.17s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "QUESTION: What are the key features of the Adventure Tee that make it suitable for running and sports activities?\n\nANSWER: The Adventure Tee is breathable and quick-drying, made from synthetic fabric, and includes mesh ventilation panels. It also has moisture-wicking properties, making it ideal for running and sports activities.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null,
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "self_harm": {
              "filtered": false,
              "severity": "safe"
            },
            "sexual": {
              "filtered": false,
              "severity": "safe"
            },
            "violence": {
              "fi



[36;1m[1;3m[llm/end][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] [1.46s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "QUESTION: What are the key features of the UV Blocker Hoodie that make it suitable for extreme sun conditions?\n\nANSWER: The UV Blocker Hoodie is suitable for extreme sun conditions due to its full coverage design, UPF 50+ protection, thumb holes, and adjustable hood. It is made from lightweight polyester, which contributes to its effectiveness in blocking harmful UV rays.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null,
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "protected_material_code": {
              "filtered": false,
              "detected": false
            },
            "protected_material_text": {
              "filtered": false,
              "de



[36;1m[1;3m[llm/end][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] [1.11s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "QUESTION: What are the key features of the Hiking Pants mentioned in the document, and what activities are they best suited for?\n\nANSWER: The key features of the Hiking Pants include durable and water-resistant material, convertible legs, multiple pockets, and reinforced knees. They are best suited for mountain climbing and outdoor adventures.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null,
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "self_harm": {
              "filtered": false,
              "severity": "safe"
            },
            "sexual": {
              "filtered": false,
              "severity": "safe"
            },
            "violence": {
  



[36;1m[1;3m[llm/end][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] [1.85s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "QUESTION: What are the key features of the Explorer Cap that make it suitable for sun protection and comfort?\n\nANSWER: The Explorer Cap is designed for sun protection with its wide brim and UPF 30+ protection. It is made from breathable cotton and includes a sweat band for added comfort. Additionally, it has an adjustable strap for a customizable fit.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null,
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "protected_material_code": {
              "filtered": false,
              "detected": false
            },
            "protected_material_text": {
              "filtered": false,
              "detected": false
      

In [None]:
# Example 5: Working QAGenerateChain with Retrieval Testing
print("\n--- Example 5: QAGenerateChain + Retrieval Testing ---")

qa_generator = QAGenerateChain.from_llm(llm)

qa_pair = qa_generator.apply_and_parse([{"doc": docs[0].page_content}])
question = qa_pair[0]["qa_pairs"]["query"]
response = qa.invoke({"query": question})
print(f"QA pair: {qa_pair}")


--- Example 5: QAGenerateChain + Retrieval Testing ---
[32;1m[1;3m[chain/start][0m [1m[chain:QAGenerateChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "doc": "product_name: Sun Shield Shirt\ndescription: Lightweight shirt with UPF 50+ sun protection for outdoor activities. Perfect for hiking and beach trips. Available in blue and white colors.\nfeatures: UPF 50+ protection\nprice: Moisture-wicking fabric\ncategory: 45.00\nNone: Shirts"
    }
  ]
}
[32;1m[1;3m[llm/start][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: You are a teacher coming up with questions to ask on a quiz. \nGiven the following document, please generate a question and answer based on that document.\n\nExample Format:\n<Begin Document>\n...\n<End Document>\nQUESTION: question here\nANSWER: answer here\n\nThese questions should be detailed and be based explicitly on information in the document. Begin!\n\n<Begin 



[36;1m[1;3m[llm/end][0m [1m[chain:QAGenerateChain > llm:CustomAzureChatOpenAI] [5.88s] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "QUESTION: What is the UPF rating of the Sun Shield Shirt, and what activities is it particularly suitable for?\nANSWER: The Sun Shield Shirt has a UPF rating of 50+, making it particularly suitable for outdoor activities such as hiking and beach trips.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null,
          "content_filter_results": {
            "hate": {
              "filtered": false,
              "severity": "safe"
            },
            "self_harm": {
              "filtered": false,
              "severity": "safe"
            },
            "sexual": {
              "filtered": false,
              "severity": "safe"
            },
            "violence": {
              "filtered": false,
              "severity": "safe"
            }
          }
     

### 

In [22]:
import os
from langchain.document_loaders import CSVLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import RetrievalQA
from langchain.evaluation.qa import QAGenerateChain, QAEvalChain
from test_memory import get_llm 
from AzureConnection import embeddings

In [23]:

# --- 1. Setup and Data Loading ---

# Ensure you have an environment variable for your LLM API key
# os.environ["OPENAI_API_KEY"] = "YOUR_API_KEY" 

# Load the documents from the CSV file
loader = CSVLoader(file_path='OutdoorClothingCatalog_1000.csv')
docs = loader.load()

# Initialize your LLM model
llm = get_llm()

# Create a vector store index from the documents
# This will create embeddings and store them in memory for fast retrieval
index = VectorstoreIndexCreator(embedding=embeddings).from_loaders([loader])




In [24]:


# --- 2. QA Chain Setup ---

# Set up the retrieval-based question-answering chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=index.vectorstore.as_retriever(),
    return_source_documents=True,
    verbose=False, # Set to True to see chain details
)


In [26]:
# --- 3. Generate Question-Answer Pairs ---

# Use a generation chain to create question-answer pairs from your documents
# This creates the "ground truth" data for evaluation
# We'll generate questions for the first 5 documents for this example
# Note: This can take a few minutes depending on the LLM and number of docs
print("Generating Q&A pairs from documents...")
qa_generation_chain = QAGenerateChain.from_llm(llm)

# Convert documents to the format expected by QAGenerateChain
doc_inputs = [{"doc": doc.page_content} for doc in docs[:5]]
examples = qa_generation_chain.apply(doc_inputs)

# The output of .apply is a list of dicts, we need to extract the qa pairs
examples = [item['qa_pairs'] for item in examples]
print(f"Generated {len(examples)} sets of Q&A pairs.\n")


Generating Q&A pairs from documents...
[32;1m[1;3m[chain/start][0m [1m[chain:QAGenerateChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "doc": "product_name: Sun Shield Shirt\ndescription: Lightweight shirt with UPF 50+ sun protection for outdoor activities. Perfect for hiking and beach trips. Available in blue and white colors.\nfeatures: UPF 50+ protection\nprice: Moisture-wicking fabric\ncategory: 45.00\nNone: Shirts"
    },
    {
      "doc": "product_name: Adventure Tee\ndescription: Breathable and quick-drying t-shirt made from synthetic fabric. Ideal for running and sports activities. Features mesh ventilation panels.\nfeatures: Moisture-wicking\nprice: Quick-dry\ncategory: 30.00\nNone: Shirts"
    },
    {
      "doc": "product_name: UV Blocker Hoodie\ndescription: Full coverage hoodie for extreme sun conditions. Includes thumb holes and adjustable hood. Made from lightweight polyester with UPF 50+ protection.\nfeatures: UPF 50+\nprice: Thumb holes\

In [28]:
# --- 4. Get Predictions from Your QA Chain ---

# Now, run your QA chain to get the "predicted" answers for the generated questions
print("Getting predictions from the QA chain...")
predictions = []
for example in examples:
    # Try different possible key names for the question
    question = example.get("question") or example.get("query") or example.get("text")
    if question:
        response = qa.invoke({"query": question})
        predictions.append({"result": response["result"]})
    else:
        print(f"No question found in example: {example}")
        predictions.append({"result": "No question available"})
print("Predictions received.\n")

Getting predictions from the QA chain...
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What are the key features and benefits of the Sun Shield Shirt as described in the document?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What are the key features and benefits of the Sun Shield Shirt as described in the document?",
  "context": "product_name: Sun Shield Shirt\ndescription: Lightweight shirt with UPF 50+ sun protection for outdoor activities. Perfect for hiking and beach trips. Available in blue and white colors.\nfeatures: UPF 50+ protection\nprice: Moisture-wicking fabric\ncategory: 45.00\nNone: Shirts\n\nproduct_name: Mountain Shirt\ndescription: Long sleeve shirt with built-in sun protectio

In [29]:
# --- 5. Evaluate the Predictions ---

# Set up the evaluation chain
eval_chain = QAEvalChain.from_llm(llm)

# Get the graded outputs
# The eval chain compares the "ground truth" answer from `examples`
# with the "predicted" answer from `predictions`
print("Evaluating the predictions...")
graded_outputs = eval_chain.evaluate(examples, predictions)
print("Evaluation complete.\n")



Evaluating the predictions...
[32;1m[1;3m[chain/start][0m [1m[chain:QAEvalChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "query": "What are the key features and benefits of the Sun Shield Shirt as described in the document?",
      "answer": "The Sun Shield Shirt is a lightweight shirt that offers UPF 50+ sun protection, making it ideal for outdoor activities such as hiking and beach trips. It is available in blue and white colors and is made from moisture-wicking fabric. The shirt is priced at $45.00 and falls under the category of shirts.",
      "result": "The key features and benefits of the Sun Shield Shirt include:\n\n- Lightweight design, making it ideal for outdoor activities such as hiking and beach trips.\n- UPF 50+ sun protection, providing excellent protection against harmful UV rays.\n- Available in blue and white colors."
    },
    {
      "query": "What are the key features of the Adventure Tee that make it suitable for running and sports 

In [31]:
# --- 6. Display Results ---

# Loop through the examples and the graded outputs to print the results
for i, eg in enumerate(examples):
    # Get the corresponding prediction and grade
    prediction = predictions[i]
    grade = graded_outputs[i]

    print(f"--- Example {i+1} ---")
    # Try different possible key names for the question
    question = eg.get("question") or eg.get("query") or eg.get("text")
    answer = eg.get("answer") or eg.get("result")
    
    print(f"Question: {question}")
    print(f"Real Answer: {answer}")
    print(f"Predicted Answer: {prediction['result']}")
    print(f"Predicted Grade: {grade['results']}\n")

--- Example 1 ---
Question: What are the key features and benefits of the Sun Shield Shirt as described in the document?
Real Answer: The Sun Shield Shirt is a lightweight shirt that offers UPF 50+ sun protection, making it ideal for outdoor activities such as hiking and beach trips. It is available in blue and white colors and is made from moisture-wicking fabric. The shirt is priced at $45.00 and falls under the category of shirts.
Predicted Answer: The key features and benefits of the Sun Shield Shirt include:

- Lightweight design, making it ideal for outdoor activities such as hiking and beach trips.
- UPF 50+ sun protection, providing excellent protection against harmful UV rays.
- Available in blue and white colors.
Predicted Grade: CORRECT

--- Example 2 ---
Question: What are the key features of the Adventure Tee that make it suitable for running and sports activities?
Real Answer: The Adventure Tee is breathable and quick-drying, made from synthetic fabric, and features mesh 