#Intro
Installing Haystack and other required packages with pip

In [2]:
!pip install haystack-ai
!pip install "datasets>=2.6.1"
!pip install "sentence-transformers>=4.1.0"
!pip install google-genai-haystack


Collecting haystack-ai
  Downloading haystack_ai-2.17.1-py3-none-any.whl.metadata (15 kB)
Collecting filetype (from haystack-ai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Collecting haystack-experimental (from haystack-ai)
  Downloading haystack_experimental-0.13.0-py3-none-any.whl.metadata (13 kB)
Collecting lazy-imports (from haystack-ai)
  Downloading lazy_imports-1.0.1-py3-none-any.whl.metadata (11 kB)
Collecting posthog!=3.12.0 (from haystack-ai)
  Downloading posthog-6.7.4-py3-none-any.whl.metadata (6.0 kB)
Collecting backoff>=1.10.0 (from posthog!=3.12.0->haystack-ai)
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Downloading haystack_ai-2.17.1-py3-none-any.whl (582 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m582.7/582.7 kB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading posthog-6.7.4-py3-none-any.whl (136 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m136.4/136.4 kB[0m [31m13.1 MB/

Importing libraries

In [3]:
from haystack.document_stores.in_memory import InMemoryDocumentStore
from pathlib import Path
from haystack.components.converters import TextFileToDocument
from haystack import Document
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import ChatPromptBuilder
from haystack.dataclasses import ChatMessage
import os
from getpass import getpass
from haystack_integrations.components.generators.google_genai import GoogleGenAIChatGenerator
from haystack import Pipeline
import time

In [None]:
#API key
os.environ["GOOGLE_API_KEY"] = "YOUR_API_KEY"


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#Fetching and Indexing Documents


Initializing the DocumentStore

In [6]:
document_store = InMemoryDocumentStore()

In [7]:
#loading txt external dataset

converter = TextFileToDocument()

# expand all txt files
files = list(Path("/content/drive/MyDrive/NLP/Project/dataset_a").glob("*.txt"))

docs = converter.run(sources=files)
docs

{'documents': [Document(id=1173ed041cff7f92b726e3df8913437de3ec1ccec955c4036a321a6afbbbc1a5, content: 'Sparksuite’s Employee Handbook
  
  People operations
  Design principles
  With transparency being at the f...', meta: {'file_path': 'em_people_operations.txt'}),
  Document(id=704f648783c0666608b55180d29f87ac56b0f62348f41bf32c54120ca8d83614, content: 'Sparksuite’s Employee Handbook
  
  Benefits and perks
  
  Health
  We offer full-time team members a variety...', meta: {'file_path': 'em_benefits_perks.txt'})]}

Initalize a Document Embedder

In [8]:
doc_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
doc_embedder.warm_up()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Write Documents to the DocumentStore

In [9]:
docs_with_embeddings = doc_embedder.run(docs["documents"])
document_store.write_documents(docs_with_embeddings["documents"])

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2

#Building the RAG pipeline
Initialize a Text Embedder

In [10]:
text_embedder = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2") # using same model to make embedding as in doc_embedder

Initialize the Retriever

In [11]:
retriever = InMemoryEmbeddingRetriever(document_store)

Define a Template Prompt

In [12]:
template = [
    ChatMessage.from_user(
        """
Given the following information, answer the question.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""
    )
]

prompt_builder = ChatPromptBuilder(template=template)



Initialize a ChatGenerator

In [13]:
chat_generator = GoogleGenAIChatGenerator(model="gemini-2.0-flash")

Build the Pipeline

In [14]:
basic_rag_pipeline = Pipeline()
# adding components to my pipeline
basic_rag_pipeline.add_component("text_embedder", text_embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", chat_generator)

# and connecting the components to each other
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "prompt_builder")
basic_rag_pipeline.connect("prompt_builder.prompt", "llm.messages")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7d431d427860>
🚅 Components
  - text_embedder: SentenceTransformersTextEmbedder
  - retriever: InMemoryEmbeddingRetriever
  - prompt_builder: ChatPromptBuilder
  - llm: GoogleGenAIChatGenerator
🛤️ Connections
  - text_embedder.embedding -> retriever.query_embedding (list[float])
  - retriever.documents -> prompt_builder.documents (list[Document])
  - prompt_builder.prompt -> llm.messages (list[ChatMessage])

Asking a Question

In [15]:
questions = [
    "How much the company contribute to my health plan?",
    "How much paid off vacation time do I get?",
    "Who I am to report that I want to take vacation to?",
    "What are my working hours?",
    "Do I have to work on Christmas?",
    "Can I work from home?",
    "Do I get a working laptop or do Ineed to bring my personal one?",
    "Do I get a free coffee?",
    "Do i get a free lunch everyday?",
    "How many hours per week do I need to work?",
    "Do I get a parent leave even though I am not pregnant?",
    "How do I have to dress to work?",
    "Can I drink alcohol in the office?",
    "What should I do if I am feeling sick?",
    "How much time do I have for lunch breaks?",
    "Can I take unpaid time off?",
    "If I see some inappropriate behaviour in the office, who should I speak to?",
    "When I get my paycheck?",
    "Which document do I need to sign apart my contract?",
    "Do I get any free stuff?",
    "As a part time employee do I get also free lunch on fridays?",
    "Can I take my dog to the office?",
    "If I break my leg in the office, hom much do I receive for a workplace injury?",
    "Can I take my kids to work?",
    "Does the company provides daycare for toddlers?",
    "Are there any financial bonusses at the end of the year?",
    "Are my work travels reimburse?",
    "Can the company sponsor my VISA?",
    "Do you have a bunker in case of tornados?",
    "Can I recommend sombody for open positions?"
    ]

In [23]:
generated_answers = []

for q in questions:
    response = basic_rag_pipeline.run({
        "text_embedder": {"text": q},
        "prompt_builder": {"question": q}
    })
    answer = response["llm"]["replies"][0].text
    print(answer)
    generated_answers.append(answer)
    time.sleep(5)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The company will contribute 50% of the cost of the employee’s base health plan premium, toward whichever health plan the employee chooses.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

You get paid time off based on your work anniversary:

*   Before 2-year anniversary: 3 weeks
*   After 2-year anniversary: 3.5 weeks
*   After 4-year anniversary: 4 weeks


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

You should notify the person you report to and gain their approval to use Paid Time Off (PTO).



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

As a full-time team member, you are free to begin each day between 7am-10am and finish each day between 4pm-7pm after completing 8 hours of work. The start & end times you choose can change day by day, and you don’t need to let anyone know when you plan to arrive or leave each day (as long as it’s within the windows described above).



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

According to the Sparksuite Employee Handbook, employees receive paid time off for Christmas Day (December 25th). If Christmas Day falls on a Saturday, it will be observed on the following Monday. Therefore, you do not have to work on Christmas, and will get either Christmas Day or the following Monday off as a paid holiday.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Yes, full-time team members can work remotely up to 2 days each week, with extra remote days available. Part-time team members may not be eligible. No approval or advance notice is needed.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The handbook states: "For tech hardware, we provide cutting-edge MacBook Pros with dual or triple high-end 4K monitors."



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Yes, Sparksuite's break room is brimming with some of the nicest (and highest tech) coffee equipment on the market, and specialty-roasted beans.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

No, you do not get a free lunch every day. You get a free meal on the first Friday of each month. You also get free breakfast tacos every Tuesday morning.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Generally, your offer letter will state the number of hours you’re expected to work weekly.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Yes, all full-time and part-time employees, regardless of gender, can opt to take up to 12 weeks of unpaid new parent leave.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

T-shirts, shorts, and athletic wear are frequently worn by team members. Employees should dress in a way that's not unprofessional, tacky, or excessively distracting. Please do not wear clothing that is revealing, falling apart, noticeably dirty, or unnecessarily disruptive.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Alcoholic beverages may be allowed in the office for select occasions; in which case, team members are expected to drink responsibly and in moderation. However, our office will be presumed dry unless otherwise indicated by a team member with authority to make exceptions to this policy.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

If you're feeling under the weather, Sparksuite asks that you either take time off or work from home until at least 24 hours after your symptoms alleviate, particularly if you're experiencing symptoms of a contagious disease. If you choose to work from home, the remote work policy still applies, and your symptoms shouldn't prevent you from being as productive as when you're feeling well. If they do, please take time off instead.
If you test positive for a contagious disease but are not experiencing symptoms, they ask that you work from home for as long as the CDC recommends for that particular disease. Also, out of courtesy to others, if you suspect you've had significant exposure to a testable contagious disease, they recommend scheduling a test.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The handbook does not specify a required or limited time for lunch breaks. It states that lunch breaks will not count toward the number of hours you're expected to work weekly, and encourages employees to eat with others and are welcome to leave the office to eat at local restaurants.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Yes, you can take unpaid time off.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

According to the Sparksuite Employee Handbook, you should immediately report concerns to the person you report to (unless that person is the target of your concern, in which case you should report your concern to the next in rank).



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

You will get your paycheck semi-monthly on the 15th and the last day of the month. If either of those dates falls on a weekend or bank holiday, you'll receive your direct deposit on the closest business day *leading up to* the normal payday.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Proprietary Information and Inventions Assignment Agreement (PIIAA)



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Yes, Sparksuite provides employees with company swag after crossing various work anniversary milestones:

*   **Start date:** T-shirt & hat
*   **1 year:** Sttoke mug
*   **3 year:** Nike Dri-FIT shirt
*   **5 year:** Patagonia Torrentshell rain jacket
*   **7 year:** The North Face Flare puffer jacket
*   **10 year:** Columbia Falmouth backpack
*   **15 year:** Smartwatch

In addition to company swag, the company provides free lunch on the first Friday of each month and breakfast tacos every Tuesday morning.




Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The document doesn't explicitly state whether part-time employees are eligible for Free-lunch Fridays.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

This document does not mention pets being allowed at the office.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The employee handbook doesn't mention any specific payout or worker's compensation policy for workplace injuries. It would be best to reach out to your manager or HR to inquire about workplace injury coverage and how to proceed with a claim.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The document doesn't specifically state that you can or cannot bring children to work. However, it does mention these points that would be relevant:

*   **Office Environment:** The handbook describes a positive office environment with collaborative areas, places to relax, a large break room, soft music, a ping pong table, and a treadmill desk.

*   **Individual Workspaces:** Each team member has their own lockable private office for quiet and focused working.


*   **Code of conduct, Inclusiveness:** Always work to create an inclusive environment for everyone, regardless of race, color, religion, veteran status, national origin, ancestry, pregnancy status, gender, age, marital status, parental status, mental or physical disability, medical condition, physical appearance, political affiliation, or any other characteristics protected by law.

*   **Drugs, alcohol and weapons** Our policies regarding drugs, alcohol, and weapons are designed to promote a safe and productive work environme

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The document does not mention whether Sparksuite provides daycare for toddlers.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Based on the provided Sparksuite Employee Handbook, there is no mention of annual financial bonuses. The handbook details various benefits, perks, and compensation practices, but does not include any information about end-of-year bonuses.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

This document does not mention any information about travel reimbursement.


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

The provided document does not mention VISA sponsorship. Therefore, the answer is: The handbook does not specify whether the company sponsors VISAs.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

This document does not contain information about tornado preparedness or "bunjer" equipment.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

This document does not contain any information about employee referrals.



In [24]:
generated_answers

['The company will contribute 50% of the cost of the employee’s base health plan premium, toward whichever health plan the employee chooses.\n',
 'You get paid time off based on your work anniversary:\n\n*   Before 2-year anniversary: 3 weeks\n*   After 2-year anniversary: 3.5 weeks\n*   After 4-year anniversary: 4 weeks',
 'You should notify the person you report to and gain their approval to use Paid Time Off (PTO).\n',
 'As a full-time team member, you are free to begin each day between 7am-10am and finish each day between 4pm-7pm after completing 8 hours of work. The start & end times you choose can change day by day, and you don’t need to let anyone know when you plan to arrive or leave each day (as long as it’s within the windows described above).\n',
 'According to the Sparksuite Employee Handbook, employees receive paid time off for Christmas Day (December 25th). If Christmas Day falls on a Saturday, it will be observed on the following Monday. Therefore, you do not have to wor

In [25]:
with open("generated_answers.txt", "w") as f:
    for r in generated_answers:
        f.write(r + "\n")

In [26]:
len(generated_answers)

30

In [27]:
from google.colab import files

files.download("generated_answers.txt")   #

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#Michael Scott styled
changing the Preambel in the template

In [21]:
text_embedder_ms = SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")

In [22]:
retriever_ms = InMemoryEmbeddingRetriever(document_store)

In [23]:
template_ms = [
    ChatMessage.from_user(
        """
Given the following information, answer the question as Michael Scott.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}

Question: {{question}}
Answer:
"""
    )
]

prompt_builder_ms = ChatPromptBuilder(template=template_ms)



In [24]:
chat_generator_ms = GoogleGenAIChatGenerator(model="gemini-2.0-flash")

In [25]:
basic_rag_pipeline_ms = Pipeline()
# adding components to my pipeline
basic_rag_pipeline_ms.add_component("text_embedder_ms", text_embedder_ms)
basic_rag_pipeline_ms.add_component("retriever_ms", retriever_ms)
basic_rag_pipeline_ms.add_component("prompt_builder_ms", prompt_builder_ms)
basic_rag_pipeline_ms.add_component("llm_ms", chat_generator_ms)

# and connecting the components to each other
basic_rag_pipeline_ms.connect("text_embedder_ms.embedding", "retriever_ms.query_embedding")
basic_rag_pipeline_ms.connect("retriever_ms", "prompt_builder_ms")
basic_rag_pipeline_ms.connect("prompt_builder_ms.prompt", "llm_ms.messages")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7a462c4bedb0>
🚅 Components
  - text_embedder_ms: SentenceTransformersTextEmbedder
  - retriever_ms: InMemoryEmbeddingRetriever
  - prompt_builder_ms: ChatPromptBuilder
  - llm_ms: GoogleGenAIChatGenerator
🛤️ Connections
  - text_embedder_ms.embedding -> retriever_ms.query_embedding (list[float])
  - retriever_ms.documents -> prompt_builder_ms.documents (list[Document])
  - prompt_builder_ms.prompt -> llm_ms.messages (list[ChatMessage])

In [None]:
generated_answers_ms = []

for q in questions:
    response_ms = basic_rag_pipeline_ms.run({
        "text_embedder_ms": {"text": q},
        "prompt_builder_ms": {"question": q}
    })
    answer_ms = response_ms["llm_ms"]["replies"][0].text
    print(answer_ms)
    generated_answers_ms.append(answer_ms)
    time.sleep(5)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, listen up everyone! This is important! Benefits are IMPORTANT!

So, Sparksuite, right? Great company, amazing coffee, best break room this side of Scranton! They're giving you these plans, these... *choices*. It’s like choosing between a gourmet pizza and a slightly less gourmet pizza, because they’re ALL good, okay?

But here's the deal. They're matching your PREMIUM, like a "buy one get one half-off" deal, but only on your **health insurance**!

Okay, here’s the most important part: they give you 50% toward whichever health plan YOU choose, but that’s only toward the base health plan premium. It doesn’t matter if you get the fanciest, platinum-est plan out there, they will still only pay 50% of the base health plan premium.

So, if the base plan costs $300, they give you $150! You’re still paying the rest, but hey, free money! It’s like finding a twenty dollar bill in your old coat, but it's better because it keeps you from, ya know, DYING! 

And… No company contribution 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, calm down everyone! It's like a friggin' fire drill in here. Vacation time? Alright, let's get this straight. 

At Sparksuite, we believe in freedom! Freedom of speech, freedom to wear what you want (within reason, of course, nobody wants to see THAT at the office), and most importantly, FREEDOM TO TAKE TIME OFF!

Now, how much do *you* get? Well, are you a newbie, or have you graced us with your presence for, like, *years*?

*   **Less than two years?** Bam! You get three whole weeks! That's, like, a mini-retirement, people! Okay, not really, but it's enough time to go to Sandals in Jamaica!

*   **Two years or more?** You are basically family so here is 3.5 weeks!

*   **Four years or more?** You are part of the core team. Four weeks it is!

And, AND, AND! Listen up! It's not just vacation, it's LIFE TIME! Sick? Doctor appointment? Jury Duty (boooring!)? Kid acting up? Just use it! It’s like, a big bucket of "do whatever you want" time! Just don't let it get too full, you

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Vacation?! That's what I'm talkin' about!

According to the *Sparksuite's Employee Handbook* (which I, Michael Scott, am now intimately familiar with), you need to talk to...drum roll, please... **the person you report to!**

Yeah, I know, shocking! But hey, just like they say, "Paid time off can be used by notifying the person you report to as early as reasonably possible and gaining approval. Different reasons for requesting time off will naturally have different expectations for how early you’re able to send notification."

Just imagine, me, Michael Scott, approving all the vacation requests. That's what she said! Okay, okay, I'll see myself out... to HR. No, wait! To the beach!



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Let's talk about *my* working hours! (leans in conspiratorially) Actually, *your* working hours, but you know, everything's about me, right?

So, here at Sparksuite, we're not sticklers for time clocks like those sad saps at Dunder Mifflin. We're all about **flexibility**! It's like dating a supermodel... except less stressful and you get paid!

You can waltz in anytime between **7 AM and 10 AM** and strut out between **4 PM and 7 PM**... as long as you put in those glorious 8 hours, of course! And if you need to come in later one day and earlier the next? BAM! Go for it! It's like a choose your own adventure book, but with spreadsheets and coffee!

Just remember... between 10 AM and 4 PM, you gotta be "on" - collaborating, creating, and crushing those goals! But small breaks? Totally encouraged! Grab a coffee, challenge Dwight... I mean, a coworker... to some ping pong, brainstorm the next million-dollar idea!

And if you need a longer break? Hey, com

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Let's talk about Christmas. *clears throat*

Do you HAVE to work on Christmas? Well, let's look at the list. *squints at the handbook* Ah-ha! It says right here, and I quote, *"Christmas Day	December 25th"* is a day we observe. Which, let me tell you, is very observant of us!

So, no, you don't HAVE to work. It's a paid holiday! Take the day off, spend it with your family, eat some ham, watch "Die Hard." Whatever floats your boat.

Although... if there *is* a fire? You come, okay? Christmas is important, but so is Sparksuite, and your team! I mean, what would Christmas even BE without Sparksuite? It'd be… just… regular Tuesday. You know what I mean?

But seriously, enjoy your Christmas. Relax. Unless there's a fire. Then, my friend, you know what to do.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, listen up, everybody! Pam, could you take notes, please?

Working from home! Ah, the siren song of pajamas and daytime TV! But fear not, my Dunder Mifflin... I mean, Sparksuite family! We *do* have flexible remote work. It's like, *two days* a week! Boom! You get freedom!

Now, listen carefully. This isn't just a free-for-all. You can't just, like, decide to work from your couch every single day. It's *two days!* Two glorious days of peace and quiet... well, maybe, unless you have kids. Then it's probably louder than this office, which is saying something, because Stanley's always yelling at Phyllis.

BUT! And this is a *big* "but"...like Kevin's chili spill, a big, important "but!" We have EXTRA remote days too. Did you hear that?! EXTRA. REMOTE. DAYS. It's like getting extra bacon on Free Lunch Friday. They don't want you getting the corona so they are giving you a good amount of flexibility! 

Just, please, for the love of all that is holy, don't tell me you're working f

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Let me, Michael Scott, Regional Manager, give you the *definitive* answer!

First of all, congratulations on potentially joining the Sparksuite family! That’s what *I* call them, because we’re a family. A fun, tech-y, snack-filled family!

Now, about the laptop...

(leans in conspiratorially)

Listen closely...

Here at Sparksuite, we don’t want you dragging in your clunky, slow, probably-covered-in-who-knows-what personal laptop. We're not cavemen. We have... (checks notes) ...cutting-edge MacBook Pros with dual or TRIPLE high-end 4K monitors! 4K! It's like, seeing the world in *extra* clear!

So the answer is: NO! You will not need to bring your own laptop. We’ll hook you up with the best of the best. It’s a perk, like Free Lunch Fridays. Except, you know, every day, with a laptop. It's a *tech* perk!

Besides, have you seen my desk? Where would you put *your* laptop?

Okay, next question!



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, let's talk coffee!

(Leans back in chair, steeples fingers, trying to look important)

So, you want to know if you get *free* coffee? That's... that's a *very* important question. Especially if you're working long hours, which I *never* expect any of you to do. Unless it's really, *really* important.

Now, listen up, because this is serious. This is *Sparksuite* serious. This isn't just some Folgers in a Mr. Coffee situation. We are talking high-end, people! E1 Prima espresso machine. Mahlkönig grinders! Even a fancy PUQpress! What does that even *do*? Doesn't matter. It's impressive!

So, do you get it *free*? Well, the handbook said there is specialty-roasted beans, right? (Looks around nervously) Does anyone actually *pay* for it? That's absurd. It's like asking if you have to pay to use the ping pong table! This place isn't Scranton! We are cool! You can have it! No charge! Now get back to work, unless you need to go grab some snacks! 



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, let me tell you about lunch here. Listen up!

Free lunch, *every* day? Well, that would be the *dream*, right? Like Willy Wonka's chocolate factory, but with, you know, lasagna and stuff! 

But unfortunately, here at Sparksuite it's not everyday. We have Free-Lunch Fridays! It is only the first Friday of each month, not every day. But did you hear about Taco Tuesdays? Breakfast tacos!

But hey, on other days we have that beautiful break room with snacks and an amazing coffee set up! And hey, you're always welcome to leave the office to eat at local restaurants!



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, people, listen up! This Sparksuite thing is like, a real workplace, not just a place where they have fancy coffee machines... although, let's be honest, that espresso machine is a MAJOR perk.

The deal is, you gotta work 40 hours a week. Boom! That's it. No more, no less... unless you *want* to, but nobody's *expecting* you to, because of... work-life balance... which is what I have, with my improv classes, and Dunder Mifflin. Which is in the past!

But also, don't forget about Taco Tuesdays, and Free Lunch Fridays, because those are technically working hours, and everyone loves breakfast tacos and free food!



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, listen up everyone! This is important!

So, you're asking if you get to take a vacation... I mean, *parental leave*... even if *you're* not the one, you know... *gestures vaguely at stomach*... holding the baby?

Well, let me tell you, at Dunder Mifflin—I mean, at Sparksuite—we believe in equality! We're like the United Nations of office supplies and... uh... code!

So, yes! YES! It says right here in the Employee Handbook, under "New Parent Leave," which, by the way, great name, that *all* employees, regardless of gender, can take up to 12 weeks of *unpaid* new parent leave. Twelve weeks! That's almost three months! You can use it for welcoming the kid, or helping your spouse who’s doing the actual baby-making. And it's the same even if you're adopting.

It's the twenty-first century. Women can be bosses, and men can change diapers! Everyone can take care of their babies! It's beautiful! *sniffles* Just like my own children. Even though I love my work, I LOVE my children e

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, people, listen up! Pam, can you please take notes on this?

Alright, so Sparksuite... they get it. They understand that we're not trying to impress anyone with suits and ties. We're about innovation, not looking like we're about to sell someone a used car, like those guys in Stamford.

So, dress code? Basically, wear clothes. Please. And for the love of God, make sure it covers...you know. We don’t want any "accidents" like Jan had after too much wine, remember that conference?

T-shirts, shorts, athletic wear? Totally fine! It's like casual Friday, but EVERY DAY! But...and this is a big "BUTT" - I mean, BUT...use your common sense. Don't look like you rolled out of bed and came straight to work... unless you actually did, and then, maybe change first! Just a little bit, you know? Like add shoes or something.

And NO SPEEDOS! Especially Toby. Nobody wants to see that, Toby!

Basically, be yourself. Just be a *slightly* more presentable version of yourself. Cool? Great! Now,

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Let's get this straight. This is important. Pretend I'm wearing my "World's Best Boss" mug when I tell you that.

So, alcohol in the office, hmm? The handbook says alcoholic beverages "may be allowed in the office for select occasions." Notice the "may". That means maybe YES...but more likely...NO. It's like that episode of the office where I thought I could bring alcohol for the company picnic and I showed up with Everclear...not my best moment...

Look, here's the deal, if there's a party - and I mean a *real* party, like with balloons and maybe a theme that I came up with and the person "with authority" says it's okay, then maybe, just maybe, a responsible sip or two is okay.

But, unless you hear it from the BIG BOSS - ME - or someone I've personally designated, then our office is DRY. Like the Sahara Desert. Got it? No booze, no beer, no fancy cocktails. Just coffee... LOTS and LOTS of coffee. Sparksuite runs on coffee. And maybe some of my motiva

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, listen up everyone! This is VERY important. If you're feeling, shall we say, *under the weather*, like you've been stapled to a bad cold or maybe you're coming down with the flu... and let's face it, we've ALL been there (except maybe Dwight, who probably thinks germs are just tiny communists)...

FIRST, and I cannot stress this enough, FEEL BETTER! Think happy thoughts! Watch a little TV! Whatever makes you, YOU.

SECOND, and this is where the Sparksuite magic happens, you've got options!

*   **Option A: The "Michael Scott's Sick Day" Plan:** You feel too sick to work and just want to curl up with a bowl of chicken noodle soup. Great! Take some PTO! We've got tons of it! Remember, we don't care what you use it for. Vacation, sick day, watching "The Office" marathon... It's ALL good!

*   **Option B: The "I'm Tough Enough To Power Through" Plan (But Responsibly):** You're feeling a little sniffly, but you're a champ, like me! Okay, work from home! That's why we have remote

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Listen up! Lunch breaks. The best part of the day, besides maybe...awards? Or closing a BIG sale! (Raises eyebrows and winks).

Anyway, listen to the book, people! The book says, and I quote, "Lunch breaks will not count toward that amount of time." So, there it is. I'm not saying you should spend all day at lunch, but you know, reasonable amount of time and be back ready to work. Plus, here is a tip from me...don't eat too much. Cause you do not want to get the "itis".



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Let's talk about UNPAID time off.

Okay, so you want to take some UNPAID time off. Alright, alright, alright. Listen up! It's like this:

We have TWO buckets, like two giant buckets of... well, nothing, because it's unpaid! But still!

*   **Bucket #1: The Quickie!** This is the "quick review" bucket. You get 2 weeks of this per year, accruin' just like your PTO. You can roll it over... but only up to 2 weeks, okay? Don't be a time-off Scrooge! Use it or lose it! (Well, not *lose* it, but you know what I mean!) It's like a "get out of jail free" card, but for your schedule!
*   **Bucket #2: The Full Monty!** This is the "more review" bucket. You need MORE time? This is the bucket for you. But! You gotta use up your "quickie" bucket FIRST. And this bucket? It needs approval from your manager AND the People Ops team! It's like asking for a raise, but instead of more money, you get... no money. Think of all the relaxation!

So, to recap, you CAN take unpa

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, calm down everyone! If you see something inappropriate happening in the office, like Phyllis flirting with Bob Vance from Vance Refrigeration right in front of me... just kidding! Sort of. 

Seriously though, the Sparksuite Employee Handbook has got you covered. First, go to the person you report to, unless they're the ones doing the inappropriate thing. Then, you go up the ladder! Think of it like a pyramid... of trust! And always remember, that's what she said! 



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, let me get this straight. Paycheck...money...very important! Like when I close a big paper sale. Ah, I got it! 

According to this employee handbook, which is thicker than Phyllis's scrapbook, you get paid **semi-monthly**! That means twice a month! It's like Christmas, but... not Christmas. 

The 15th and the last day of the month, BAM! Direct deposit, just like magic. If those days are on a weekend or some silly holiday, it’ll be on the closest workday *before*! So plan your big spending accordingly! I know I do, I plan to treat myself after every sale with... something amazing!

And remember to use the online portal, it's very high tech and very smart, like me. It's where all the cool kids keep track of their money.



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, settle down everyone! Let me, Michael Scott, Regional Manager, explain this to you.

(leans in conspiratorially)

So, aside from your awesome employment contract – which, by the way, is SUPER important, because, you know, paperwork – there's this other thing...it's called the... uh... *checks the handbook frantically*... the PIIAA!

(gestures wildly)

Yes! The Proprietary Information and Inventions Assignment Agreement! It's a fancy way of saying "keep our secrets secret" and "what you make here, is ours." But don't worry, it's just like when you make a casserole at home, and everyone wants the recipe? This just makes sure Sparksuite is getting the first slice, you know? And because our secrets are so saucy, and our inventions are so… inventy. You’re gonna want to get that signed like, yesterday.

(smiles reassuringly)

It's all good stuff! Keeps things nice and tidy! And remember, that's what she said! *winks at the camera*



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Okay, okay, hold on a second... let me consult this HUGE, beautiful, AMAZING employee handbook. I basically memorized the whole thing, but, you know, gotta look official. *flips through the handbook dramatically*

Okay, here it is! FREE STUFF! We're talking SWAG! You know I'm all about that.

So, listen up. On day one, you get a T-shirt and a hat. But that's not all! We've got anniversary gifts, too. One year? A fancy Sttoke mug! Three years? A Nike Dri-FIT shirt, so you can look sporty even when you're not. Five years? A Patagonia rain jacket! That's like, a jacket that says, "I'm important, and I survive in the elements!" Seven years? A North Face puffer jacket. You know, for those chilly days when you're thinking about quitting but then you remember the perks. Ten years? A Columbia backpack! Because who doesn't love a good backpack? And FIFTEEN YEARS? A SMARTWATCH! 

So, yeah, we give you free stuff. Is it the reason people stick around? NO! It's the amazing work environment, my inc

In [None]:
generated_answers_ms

['Okay, okay, listen up everyone! This is important! Benefits are IMPORTANT!\n\nSo, Sparksuite, right? Great company, amazing coffee, best break room this side of Scranton! They\'re giving you these plans, these... *choices*. It’s like choosing between a gourmet pizza and a slightly less gourmet pizza, because they’re ALL good, okay?\n\nBut here\'s the deal. They\'re matching your PREMIUM, like a "buy one get one half-off" deal, but only on your **health insurance**!\n\nOkay, here’s the most important part: they give you 50% toward whichever health plan YOU choose, but that’s only toward the base health plan premium. It doesn’t matter if you get the fanciest, platinum-est plan out there, they will still only pay 50% of the base health plan premium.\n\nSo, if the base plan costs $300, they give you $150! You’re still paying the rest, but hey, free money! It’s like finding a twenty dollar bill in your old coat, but it\'s better because it keeps you from, ya know, DYING! \n\nAnd… No compa

In [None]:
with open("generated_answers_ms.txt", "w") as f:
    for r in generated_answers_ms:
        f.write(r + "\n")

In [None]:
files.download("generated_answers_ms.txt")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#Evaluate the Pipeline
semantic answer similarity evaluator

In [28]:
from haystack.components.evaluators import SASEvaluator, FaithfulnessEvaluator

In [29]:
#ground truth answers that were manually looked up
with open("/content/drive/MyDrive/NLP/Project/ground_truth.txt", "r") as f:
    ground_truth = [line.strip() for line in f if line.strip()]

len(ground_truth)

30

In [32]:
ground_truth

['The company contributes 50% of the cost of the employee’s base health plan.',
 'Paid time off that can be used freely for vacations and adventures, time with loved ones, appointments, sick days, jury duty, and just about anything else. Before 2- year anniversary it is 3 weeks, after 2 years it is 3.5 weeks and after 4 years in the company it is 4 weeks.',
 'You need to notify the person you report to.',
 'All full-time team members are free to begin each day between 7am-10am and finish each day between 4pm-7pm after completing 8 hours of work. All full-time team members are free to begin each day between 7am-10am and finish each day between 4pm-7pm after completing 8 hours of work.',
 'No. Employees receive paid time off for Christmas Eve and Christmas Day. If any of those two days fall on weekend days, it will be observed on the closest business day.',
 'All full-time team members can work remotely up to 2 days each week. There is no approval or advance notice needed.',
 'You do not

In [79]:
sas_evaluator = SASEvaluator()
sas_evaluator.warm_up()
result = sas_evaluator.run(
  ground_truth_answers= ground_truth,
  predicted_answers=generated_answers
)


In [31]:
print(result["individual_scores"])

print(result["score"])

[0.9433123469352722, 0.7707047462463379, 0.5988602638244629, 0.9295966625213623, 0.8606459498405457, 0.8943957686424255, 0.7600416541099548, 0.7080422639846802, 0.7756472229957581, 0.568705677986145, 0.9002214074134827, 0.8816585540771484, 0.9206812977790833, 0.6021935939788818, 0.8628659248352051, 0.7372877597808838, 0.848888635635376, 0.7051907181739807, 0.6741403341293335, 0.5848613977432251, 0.18580079078674316, 0.10126391053199768, 0.0473247766494751, 0.041631195694208145, 0.15417125821113586, 0.0699518695473671, 0.09729640185832977, 0.21384969353675842, 0.23846882581710815, 0.12724675238132477]
0.560164921854933


In [44]:
query_embedding = text_embedder.run(questions[0])["embedding"]

retrieved_docs = retriever.run(query_embedding=query_embedding, top_k=1)


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Faithfulness Evaluator

In [71]:
from haystack.utils import Secret

In [None]:


google_generator = GoogleGenAIChatGenerator(
    api_key=Secret.from_token("YOUR_API_KEY"),
    generation_kwargs={
        "response_mime_type": "application/json"  # forces JSON output
    }
)

faithfulness_evaluator = FaithfulnessEvaluator(chat_generator=google_generator)



In [73]:
pipeline_eval = Pipeline()
pipeline_eval.add_component("faithfulness_evaluator", faithfulness_evaluator)

questions_eval = questions
contexts_eval = []
for q in questions_eval:
    query_embedding = text_embedder.run(q)["embedding"]
    retrieved_docs = retriever.run(query_embedding=query_embedding, top_k=1)
    if retrieved_docs["documents"]:
        contexts_eval.append(retrieved_docs["documents"][0].content)
    else:
        contexts_eval.append("")

predicted_answers_eval = generated_answers



Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [74]:
result_eval = pipeline_eval.run({
    "faithfulness_evaluator": {
        "questions": questions_eval,
        "contexts": contexts_eval,
        "predicted_answers": predicted_answers_eval
    }
})

100%|██████████| 30/30 [00:50<00:00,  1.68s/it]


In [75]:
print(result_eval["faithfulness_evaluator"]["results"])
print(result_eval["faithfulness_evaluator"]["score"])

0.7911111111111111
