In [1]:
pip install haystack-ai

Collecting haystack-ai
  Downloading haystack_ai-2.5.1-py3-none-any.whl.metadata (13 kB)
Collecting haystack-experimental (from haystack-ai)
  Downloading haystack_experimental-0.1.1-py3-none-any.whl.metadata (6.9 kB)
Collecting lazy-imports (from haystack-ai)
  Downloading lazy_imports-0.3.1-py3-none-any.whl.metadata (10 kB)
Collecting openai>=1.1.0 (from haystack-ai)
  Downloading openai-1.46.0-py3-none-any.whl.metadata (24 kB)
Collecting posthog (from haystack-ai)
  Downloading posthog-3.6.6-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting httpx<1,>=0.23.0 (from openai>=1.1.0->haystack-ai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai>=1.1.0->haystack-ai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting monotonic>=1.5 (from posthog->haystack-ai)
  Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting backoff>=1.10.0 (from posthog->haystack-a

In [2]:
!pip install "sentence-transformers>=3.0.0" "huggingface_hub>=0.23.0"

Collecting sentence-transformers>=3.0.0
  Downloading sentence_transformers-3.1.0-py3-none-any.whl.metadata (23 kB)
Downloading sentence_transformers-3.1.0-py3-none-any.whl (249 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.1/249.1 kB[0m [31m16.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.1.0


In [3]:
from haystack import Pipeline, Document
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.utils import Secret
from haystack.components.generators import HuggingFaceAPIGenerator

In [4]:
from haystack.components.writers import DocumentWriter
from haystack.components.converters import MarkdownToDocument, PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentSplitter, DocumentCleaner
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from pathlib import Path

In [5]:
#Preprocessing-Pipeline bauen
document_store = InMemoryDocumentStore()
file_type_router = FileTypeRouter(mime_types=["text/plain", "application/pdf", "text/markdown"])
text_file_converter = TextFileToDocument()
document_joiner = DocumentJoiner()

In [6]:
document_cleaner = DocumentCleaner()
document_splitter = DocumentSplitter(split_by="word", split_length=150, split_overlap=50)

In [7]:
document_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
document_writer = DocumentWriter(document_store)

In [8]:
preprocessing_pipeline = Pipeline()
preprocessing_pipeline.add_component(instance=file_type_router, name="file_type_router")
preprocessing_pipeline.add_component(instance=text_file_converter, name="text_file_converter")
preprocessing_pipeline.add_component(instance=document_joiner, name="document_joiner")
preprocessing_pipeline.add_component(instance=document_cleaner, name="document_cleaner")
preprocessing_pipeline.add_component(instance=document_splitter, name="document_splitter")
preprocessing_pipeline.add_component(instance=document_embedder, name="document_embedder")
preprocessing_pipeline.add_component(instance=document_writer, name="document_writer")

In [9]:
preprocessing_pipeline.connect("file_type_router.text/plain", "text_file_converter.sources")
preprocessing_pipeline.connect("text_file_converter", "document_joiner")
preprocessing_pipeline.connect("document_joiner", "document_cleaner")
preprocessing_pipeline.connect("document_cleaner", "document_splitter")
preprocessing_pipeline.connect("document_splitter", "document_embedder")
preprocessing_pipeline.connect("document_embedder", "document_writer")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7e07186eac50>
🚅 Components
  - file_type_router: FileTypeRouter
  - text_file_converter: TextFileToDocument
  - document_joiner: DocumentJoiner
  - document_cleaner: DocumentCleaner
  - document_splitter: DocumentSplitter
  - document_embedder: SentenceTransformersDocumentEmbedder
  - document_writer: DocumentWriter
🛤️ Connections
  - file_type_router.text/plain -> text_file_converter.sources (List[Path])
  - text_file_converter.documents -> document_joiner.documents (List[Document])
  - document_joiner.documents -> document_cleaner.documents (List[Document])
  - document_cleaner.documents -> document_splitter.documents (List[Document])
  - document_splitter.documents -> document_embedder.documents (List[Document])
  - document_embedder.documents -> document_writer.documents (List[Document])

In [None]:
#from sentence_transformers import SentenceTransformer

In [None]:
#pip uninstall sentence-transformers

Found existing installation: sentence-transformers 3.0.1
Uninstalling sentence-transformers-3.0.1:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/sentence_transformers-3.0.1.dist-info/*
    /usr/local/lib/python3.10/dist-packages/sentence_transformers/*
Proceed (Y/n)? Y
  Successfully uninstalled sentence-transformers-3.0.1


In [None]:
#pip install "sentence-transformers>=3.0.0"

Collecting sentence-transformers>=3.0.0
  Using cached sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Using cached sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-3.0.1


In [10]:
output_dir = "/content/"  # Pfad zum Verzeichnis, wo sich die Datei befindet
data_file = Path(output_dir) / "data_dc.txt"  # der Dateiname
preprocessing_pipeline.run({"file_type_router": {"sources": [data_file]}})

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/269 [00:00<?, ?it/s]

{'document_writer': {'documents_written': 8595}}

In [11]:
prompt_template = """
Given these documents, answer the question.
Documents:
{% for doc in documents %}
    {{ doc.content }}
{% endfor %}
Question: {{question}}
Answer:
"""

In [12]:
retriever = InMemoryBM25Retriever(document_store=document_store)
prompt_builder = PromptBuilder(template=prompt_template)

In [13]:
#token bei Hugging-Face unter Access-Token erstellen, wichtig: write!
generator = HuggingFaceAPIGenerator(api_type="serverless_inference_api",
                                    api_params={"model": "mistralai/Mixtral-8x7B-Instruct-v0.1"},
                                    token=Secret.from_token("<add_your_api_token_here>"))

In [14]:
rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", generator)
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x7e06db677ac0>
🚅 Components
  - retriever: InMemoryBM25Retriever
  - prompt_builder: PromptBuilder
  - llm: HuggingFaceAPIGenerator
🛤️ Connections
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [63]:
#Question 1
question = "Why was Kohei Sawaki forced to leave Ran when Conan shot her in the second movie? In The Fourteenth Target movie, Ran was being held hostage by Kohei Sawaki, Conan picked up the gun and shot Ran, then he was forced to leave Ran (The same scene also happened when Jo Murakami held Eri Kisaki hostage and Mouri shot her). I still don't understand why he was forced to stop holding her hostage."

In [64]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [65]:
print(results["llm"]["replies"])

[" In the provided document, it is not explicitly stated why Kohei Sawaki was forced to leave Ran when Conan shot her in the second movie. However, it can be inferred that Sawaki was forced to stop holding Ran hostage because Conan's shot made her useless as a hostage to him. Sawaki's plan was to use Ran as a hostage to escape, but after being shot, Ran became a liability rather than an asset. Therefore, Sawaki had no reason to continue holding her hostage and was forced to leave her behind. The same scenario happened when Jo Murakami held Eri Kisaki hostage and Mouri shot her. Murakami was forced to leave Eri because she was no longer useful as a hostage after being shot."]


In [66]:
#Question 2
question = "Does detective Conan ever get taken seriously as an actual detective, instead of being seen as a mere kid by the main characters? Instead of hiding behind his subtle hints he gives and his voice changer to manipulate someone else as the case solver. If so, what episode does he get taken seriously?"

In [67]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [68]:
print(results["llm"]["replies"])

[' Detective Conan does get taken seriously as an actual detective in the episode "The Identity of the Murderer!" (Episode 211). In this episode, Conan reveals the identity of the murderer by disguising himself as Sonoko with his voice changer and speaking into his voice-changer bowtie. He explains that he figured out the identity of the killer who attempted to kill Ran in the forest, then again earlier in the bedroom, the one who carried away Chikako right in front of their faces and chopped her up in the forest. Conan then explains that the killer wasn\'t really hiding in the forest, as he proved as the killer that attacked Ran in her bedroom did not use any stealth techniques. This is the first time in the series where Conan is taken seriously as an actual detective by the main characters, as they listen to his explanation and are able to understand the reasoning behind his deductions.']


In [69]:
#Question 3
question = "Who did Kaito Kid meet first: Conan or Shinichi? In Detective Conan series, Kaito Kid first appeared in episode 76. Conan acted like he's never met him and he was looking forward to meeting him and putting him in prison. How is that even possible?"

In [70]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [71]:
print(results["llm"]["replies"])

[" Kaito Kid first met Shinichi in the Detective Conan series, not Conan. This is because the episode where Kaito Kid first appeared (episode 76) takes place before Shinichi became Conan. In this episode, Shinichi is still his normal self and has not transformed into Conan yet. Therefore, Kaito Kid met Shinichi first, not Conan. The reason why Conan acted like he's never met Kaito Kid in this episode is because he has lost his memories as Shinichi and does not remember meeting Kaito Kid before."]


In [72]:
#Question 4
question = "Why does Haibara attend school? After his body shrank, Conan attends school in order to look like a normal child especially in front of Ran and Kogoro, but why does Haibara attend school? Why does not she dedicate her time trying to redevelop the APTX drug and trying to find an antibody to it?"

In [73]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [74]:
print(results["llm"]["replies"])

[' Haibara attends school as part of her alias "Ai Haibara" to prevent the Black Organization from knowing that she had shrunk into a child. She and Professor Hiroshi Agasa created her alias and enrolled her in Teitan Elementary School to keep her identity hidden. While she may have knowledge and skills that could be used to redevelop the APTX drug and find an antibody to it, her primary focus is on maintaining her cover and avoiding detection by the Black Organization.']


In [75]:
#Question 5
question = "When did Conan first met Haibara's sister? At the beginning of episode 128, Conan said that he knew Masami, Haibara's sister. However, we have never seen them meeting before, how is that possible? In the next episode, Masami told her sister that she suspects Conan to be Shinichi. I do not think she would think that unless she knew Conan well.At the beginning of episode 128, Conan said that he knew Masami, Haibara's sister. However, we have never seen them meeting before, how is that possible? In the next episode, Masami told her sister that she suspects Conan to be Shinichi. I do not think she would think that unless she knew Conan well."

In [76]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [77]:
print(results["llm"]["replies"])

[' Conan and Haibara\'s sister, Masami, met each other during the events of episode 127, "The Missing Cabin at the Belltree Express." This is when Haibara, who was still known as Ai Haibara at the time, was introduced to the Detective Boys and revealed her past as a former member of the Black Organization with the codename "Sherry." Conan, who mistrusted Haibara due to her former affiliation with the Black Organization, protested to Professor Agasa about her presence. However, Agasa explained that once Haibara was caught by the Black Organization, it would only be a matter of time before he was captured too. Since then, Conan had started to place his trust in Haibara and accepted her as his friend. It is likely that Masami, who is Haibara\'s older sister and was also a member of the Black Organization, met Conan during this time and had the opportunity to get to know him well enough to suspect that he is actually Shinichi Kudo.']


In [78]:
#Question 6
question = "How dies Conan know the names of Vodka and Gin in the anime? I watched the anime many times but in the first episode of Detective Conan, both Vodka and Gin never mentioned their names. But later as the series continues, it looks like he knows their names even before Haibara appeared in the series."

In [79]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [80]:
print(results["llm"]["replies"])

[" Conan learns the codenames of Vodka and Gin in the anime episode 'The Shinkansen Bomb Case' (episode 10). In this episode, Conan bugs their seats as soon as they leave to go to make a transaction, and overhears Gin explain to Vodka the details of the bombing plan. Gin and Vodka disembark at Nagoya to avoid being caught in the explosion, and Conan decides to follow them. However, he is stopped by Ran Mouri from chasing them. Conan then turns his attention to finding the bomb and successfully ejects the black case from the train before it explodes."]


In [81]:
#Question 7
question = "When did Conan's glasses first get zoom functionality added? In epsisode 466, Conan sees something down in a lake and Agasa says: Use the magnifying function of your glasses. Allowing Conan to zoom in and see the body floating. I don't recall this functionality ever being mentioned before. The way he mentions it and the way Conan responds makes it seem like it is obvious they both knew about it at this point. When did Professor Agasa add this feature to Conan's glasses?"

In [82]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [83]:
print(results["llm"]["replies"])

[' The zoom functionality was added to Conan\'s glasses in episode 466, "The Mysterious Lake Disappearance Case." This is the first time it is mentioned or used in the series.']


In [85]:
#Question 8
question = "Who is the woman who talks to young Tooru Amuro/Rei Furuya? Tooru Amuro or Rey Furuya is the Black Organization member nicknamed Bourbon. He actually something like secret police agent (I can't remember the organization's name) who is working undercover in BO. He as Tooru Amuro works at Poirot Cafe, and become Kogoro's apprentice. I remember a case when Amuro, Conan, Ran and Kogoro are at the hospital and there's a case where a posion is used in a tea. After Amuro and Conan realise who the suspect of the cases is, there's a kid with her mother counting the lift that coming from the upper floors. When the kid counts to zero (they are on the ground floor), Amuro hears that and he seems surprised about it. He also has a flashback to when he was a kid and was called zero by a woman. So, who is she? Who's the woman who speaks to with young Amuro? What is their relation? And also why he called zero by this woman?"

In [86]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [87]:
print(results["llm"]["replies"])

[' The woman who speaks to young Tooru Amuro/Rei Furuya is his mother. She calls him "Zero" because it is his codename in the Public Security Bureau, where he works as an undercover agent. The fact that he responds to this codename suggests that he had a close relationship with his mother and that she was aware of his work as an agent. However, the specifics of their relationship and why she calls him "Zero" are not explicitly stated in the given document.']


In [88]:
#Question 9
question = "How old is Conan meant to be? I was rewatching an old episode of Detective Conan with a friend and we started discussing how weird it would be to hear a young child say some of the things Conan says. Then, I realized I didn't know the exact age that Conan was really supposed to be acting. I don't recall Conan's age to ever be mentioned in the show. How old is Conan supposed to be now? What was his age before he shrank?"

In [89]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [90]:
print(results["llm"]["replies"])

[' Conan is supposed to be acting as a 6-7 year old. Before he shrank, he was a 17 year old high school student.']


In [92]:
#Question 10
question = "Why did Heiji's mom have to lie to Mouri and Conan in episode 220? At the end of episode 220 of Detective Conan, The Client Full of Lies, Heiji appears and explains the reason behind the fake name Reiko to Conan that it was to conceal the fact that his mom is from Osaka. But that doesn't explain why she has to lie about her being good at cooking and her allergy to dust. If she were to hide her identity, she need not lie about the cooking and the allergies. Conan wouldn't be able to find out even if he knew that she was from Osaka since it was his first time meeting Heiji's mother. Why did she have to go that far? Or was it part of her test for Mouri to figure out her real identity? Even if it's the case, I bet no one's going to be able to deduce that she is Heiji's mother because first, it's impossible to even know the true identity of a person just by knowing her hobbies or skills. Unless she has a distinctive quality in which the Hattori's are famous for. Second, Conan did not even know a thing about Heiji's mom. The test was actually for Shinichi but since he wasn't there, she tried Mouri instead. No matter how good Conan's deductions were, it was impossible for him to expose her real identity as Heiji's mom and her real intentions. They were even surprised when she revealed her true identity as Heiji's mom since Heiji didn't even look like her. So, why did she have to lie about all the stuff?"

In [93]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [94]:
print(results["llm"]["replies"])

[" It is not explicitly stated in the given document why Heiji's mom had to lie about her cooking skills and allergy to dust. However, it is possible that she did so as part of a test for Mouri and Conan to figure out her true identity, as the document suggests. It is also possible that she wanted to gauge their observational and deductive abilities, or to see how they would react to her lies. Additionally, it is worth noting that Heiji's mom may have had her own reasons for lying, which are not mentioned in the document. Ultimately, the reason for her lies remains speculative and open to interpretation."]


In [95]:
#Question 11
question = "What happened to Mouri in episode 219? In episode 219 of Detective Conan, The Gathering of the Detectives! Shinichi Kudo vs. Kaitou Kid, after Senma jumped out of the helicopter, Mouri quickly jumps out and reveals his identity as Kaito Kid. Kid saves Senma from falling. But what happened to the real Mouri? If the Mouri in the chopper was actually Kid, where was the real Mouri?"

In [96]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [97]:
print(results["llm"]["replies"])

[' The real Mouri was not present in the helicopter. The Mouri in the helicopter was actually Kaito Kid in disguise. The real Mouri was likely on the ground, watching the events unfold.']


In [98]:
#Question 12
question = "Why does Conan call Bourbon a liar?  I still don't get the context of this at the end of chapter 897"

In [99]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [100]:
print(results["llm"]["replies"])

[' In chapter 897 of the Detective Conan manga, Conan calls Bourbon a liar because Bourbon had previously claimed that he had never met Shuichi Akai, but it is revealed that Bourbon and Akai had actually met before. This revelation leads Conan to call Bourbon a liar for denying their previous encounter.']


In [102]:
#Question 13
question = "Why doesn't Conan reveal his identity at least to Ran in Detective Conan (Case Closed)? Why doesn't Shinichi Kudo tell about his problem to Ran in Detective Conan, when he completely understands how much the girl loves him and suffers from pain due to him? While the truth is many times she was almost certain that Conan was Shinichi himself. The series now has so many episodes. Come on! She needs some sympathy."

In [103]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [104]:
print(results["llm"]["replies"])

[" Conan doesn't reveal his identity to Ran in Detective Conan (Case Closed) because he wants to protect her from the danger of the Black Organization. Shinichi Kudo doesn't tell about his problem to Ran because he doesn't want to burden her with his troubles and also because he wants to protect her from the danger of the Black Organization. Ran has suspected that Conan is Shinichi multiple times, but Conan continues to hide his identity from her to keep her safe. The series has many episodes, but the main plot revolves around Conan's mission to uncover the truth about the Black Organization and bring them to justice, while also protecting those he cares about. Revealing his identity to Ran would compromise her safety and potentially put her in danger, which is why Conan continues to keep his identity a secret."]


In [105]:
#Question 14
question = "Why do Conan and the Detective Boys call Ai Haibara-san?"

In [106]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [107]:
print(results["llm"]["replies"])

[' Conan and the Detective Boys call Ai Haibara-san because she is older than them and they respect her. Additionally, Ai only allows Ayumi to call her "Ai-chan".']


In [60]:
#Question 15
question = "Why did not Conan tell Haibara about the liquor that made him restore to his original size as soon as possible? In episode 177, Reunion with the Black Organization, Conan asked Haibara to drink the liquor which made her restore to her original size, but why did not he tell her that the first time he met her or at least after he trusted her? This is a very important information which cannot be delayed."

In [61]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [62]:
print(results["llm"]["replies"])

[' The document does not provide enough information to answer this question. It is not specified when or how Conan found out about the liquor that makes him restore to his original size, or why he did not tell Haibara about it sooner. It is also not specified whether Haibara knew about this liquor before Conan asked her to drink it in episode 177. Without this information, it is not possible to determine why Conan did not tell Haibara about the liquor sooner.']
