In [1]:
pip install haystack-ai

Collecting haystack-ai
  Downloading haystack_ai-2.5.1-py3-none-any.whl.metadata (13 kB)
Collecting haystack-experimental (from haystack-ai)
  Downloading haystack_experimental-0.1.1-py3-none-any.whl.metadata (6.9 kB)
Collecting lazy-imports (from haystack-ai)
  Downloading lazy_imports-0.3.1-py3-none-any.whl.metadata (10 kB)
Collecting openai>=1.1.0 (from haystack-ai)
  Downloading openai-1.47.0-py3-none-any.whl.metadata (24 kB)
Collecting posthog (from haystack-ai)
  Downloading posthog-3.6.6-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting httpx<1,>=0.23.0 (from openai>=1.1.0->haystack-ai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai>=1.1.0->haystack-ai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting monotonic>=1.5 (from posthog->haystack-ai)
  Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting backoff>=1.10.0 (from posthog->haystack-a

In [2]:
!pip install "sentence-transformers>=3.0.0" "huggingface_hub>=0.23.0"

Collecting sentence-transformers>=3.0.0
  Downloading sentence_transformers-3.1.1-py3-none-any.whl.metadata (10 kB)
Downloading sentence_transformers-3.1.1-py3-none-any.whl (245 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m245.3/245.3 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.1.1


In [3]:
from haystack import Pipeline, Document
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.retrievers import InMemoryBM25Retriever
from haystack.components.generators import OpenAIGenerator
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.utils import Secret
from haystack.components.generators import HuggingFaceAPIGenerator

In [4]:
from haystack.components.writers import DocumentWriter
from haystack.components.converters import MarkdownToDocument, PyPDFToDocument, TextFileToDocument
from haystack.components.preprocessors import DocumentSplitter, DocumentCleaner
from haystack.components.routers import FileTypeRouter
from haystack.components.joiners import DocumentJoiner
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from pathlib import Path

In [5]:
#Preprocessing-Pipeline bauen
document_store = InMemoryDocumentStore()
file_type_router = FileTypeRouter(mime_types=["text/plain", "application/pdf", "text/markdown"])
text_file_converter = TextFileToDocument()
document_joiner = DocumentJoiner()

In [6]:
document_cleaner = DocumentCleaner()
document_splitter = DocumentSplitter(split_by="word", split_length=150, split_overlap=50)

In [7]:
document_embedder = SentenceTransformersDocumentEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")
document_writer = DocumentWriter(document_store)

In [8]:
preprocessing_pipeline = Pipeline()
preprocessing_pipeline.add_component(instance=file_type_router, name="file_type_router")
preprocessing_pipeline.add_component(instance=text_file_converter, name="text_file_converter")
preprocessing_pipeline.add_component(instance=document_joiner, name="document_joiner")
preprocessing_pipeline.add_component(instance=document_cleaner, name="document_cleaner")
preprocessing_pipeline.add_component(instance=document_splitter, name="document_splitter")
preprocessing_pipeline.add_component(instance=document_embedder, name="document_embedder")
preprocessing_pipeline.add_component(instance=document_writer, name="document_writer")

In [9]:
preprocessing_pipeline.connect("file_type_router.text/plain", "text_file_converter.sources")
preprocessing_pipeline.connect("text_file_converter", "document_joiner")
preprocessing_pipeline.connect("document_joiner", "document_cleaner")
preprocessing_pipeline.connect("document_cleaner", "document_splitter")
preprocessing_pipeline.connect("document_splitter", "document_embedder")
preprocessing_pipeline.connect("document_embedder", "document_writer")

<haystack.core.pipeline.pipeline.Pipeline object at 0x791ccd77a1d0>
🚅 Components
  - file_type_router: FileTypeRouter
  - text_file_converter: TextFileToDocument
  - document_joiner: DocumentJoiner
  - document_cleaner: DocumentCleaner
  - document_splitter: DocumentSplitter
  - document_embedder: SentenceTransformersDocumentEmbedder
  - document_writer: DocumentWriter
🛤️ Connections
  - file_type_router.text/plain -> text_file_converter.sources (List[Path])
  - text_file_converter.documents -> document_joiner.documents (List[Document])
  - document_joiner.documents -> document_cleaner.documents (List[Document])
  - document_cleaner.documents -> document_splitter.documents (List[Document])
  - document_splitter.documents -> document_embedder.documents (List[Document])
  - document_embedder.documents -> document_writer.documents (List[Document])

In [None]:
#from sentence_transformers import SentenceTransformer

In [None]:
#pip uninstall sentence-transformers

Found existing installation: sentence-transformers 3.0.1
Uninstalling sentence-transformers-3.0.1:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/sentence_transformers-3.0.1.dist-info/*
    /usr/local/lib/python3.10/dist-packages/sentence_transformers/*
Proceed (Y/n)? Y
  Successfully uninstalled sentence-transformers-3.0.1


In [None]:
#pip install "sentence-transformers>=3.0.0"

Collecting sentence-transformers>=3.0.0
  Using cached sentence_transformers-3.0.1-py3-none-any.whl.metadata (10 kB)
Using cached sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-3.0.1


In [11]:
output_dir = "/content/"  # Pfad zum Verzeichnis, wo sich die Dateien befinden
data_files = list(Path(output_dir).glob("*.txt"))  # alle txt.-Dateien

for data_file in data_files:
    preprocessing_pipeline.run({"file_type_router": {"sources": [str(data_file)]}})

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Batches:   0%|          | 0/269 [00:00<?, ?it/s]

Batches:   0%|          | 0/1157 [00:00<?, ?it/s]

Batches:   0%|          | 0/4668 [00:00<?, ?it/s]

In [12]:
prompt_template = """
Given these documents, answer the question.
Documents:
{% for doc in documents %}
    {{ doc.content }}
{% endfor %}
Question: {{question}}
Answer:
"""

In [13]:
retriever = InMemoryBM25Retriever(document_store=document_store)
prompt_builder = PromptBuilder(template=prompt_template)

In [14]:
#token bei Hugging-Face unter Access-Token erstellen, wichtig: write!
generator = HuggingFaceAPIGenerator(api_type="serverless_inference_api",
                                    api_params={"model": "mistralai/Mixtral-8x7B-Instruct-v0.1"},
                                    token=Secret.from_token("<add_your_api_token_here>"))

In [15]:
rag_pipeline = Pipeline()
rag_pipeline.add_component("retriever", retriever)
rag_pipeline.add_component("prompt_builder", prompt_builder)
rag_pipeline.add_component("llm", generator)
rag_pipeline.connect("retriever", "prompt_builder.documents")
rag_pipeline.connect("prompt_builder", "llm")

<haystack.core.pipeline.pipeline.Pipeline object at 0x791b75a54fd0>
🚅 Components
  - retriever: InMemoryBM25Retriever
  - prompt_builder: PromptBuilder
  - llm: HuggingFaceAPIGenerator
🛤️ Connections
  - retriever.documents -> prompt_builder.documents (List[Document])
  - prompt_builder.prompt -> llm.prompt (str)

In [None]:
#Question 1
question = "Why was Kohei Sawaki forced to leave Ran when Conan shot her in the second movie? In The Fourteenth Target movie, Ran was being held hostage by Kohei Sawaki, Conan picked up the gun and shot Ran, then he was forced to leave Ran (The same scene also happened when Jo Murakami held Eri Kisaki hostage and Mouri shot her). I still don't understand why he was forced to stop holding her hostage."

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[" In the scene you're referring to, Conan shoots Ran in the leg to make her an unattractive hostage to Kohei Sawaki. Sawaki then releases Ran, as she is no longer of any use to him. This is not because he is forced to do so, but rather because Ran is now a liability to him due to her injury. Sawaki then flees the scene, leaving Ran behind."]


In [None]:
#Question 2
question = "Does detective Conan ever get taken seriously as an actual detective, instead of being seen as a mere kid by the main characters? Instead of hiding behind his subtle hints he gives and his voice changer to manipulate someone else as the case solver. If so, what episode does he get taken seriously?"

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[" Detective Conan does get taken seriously as an actual detective in some cases, but it is not common for the main characters to recognize him as such. In the episode 'The Tenth Passenger ~ Part 1' (Episode 201), Conan is seen taking charge of the investigation and revealing the identity of the murderer while disguised as Sonoko using his voice changer. However, this is not a common occurrence and the main characters, such as Kogoro Mouri, often dismiss Conan's contributions and see him as a mere kid.\n\nIn the episode 'Whereabouts of the Dark Photograph ~ Part 1' (Episode 484), Conan is seen leading the investigation and solving the case with his own abilities, but again, this is not a common occurrence.\n\nIn the episode 'The Identity of the Murderer! Template:Infobox/Conan/Chapter' (Case Closed Chapter 1010), Conan is seen taking charge of the investigation and revealing the identity of the murderer while disguised as Sonoko using his voice changer. However, this is not a common oc

In [None]:
#Question 3
question = "Who did Kaito Kid meet first: Conan or Shinichi? In Detective Conan series, Kaito Kid first appeared in episode 76. Conan acted like he's never met him and he was looking forward to meeting him and putting him in prison. How is that even possible?"

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[" Kaito Kid first met Shinichi in the Detective Conan series, not Conan. They met in the Clock tower heist case where Shinichi anticipated Kid's next move and managed to decipher the code Kid left behind. However, they were both unaware of their familial relationship at the time. Conan only appeared after Shinichi was turned into a child by the APTX 4869. Therefore, Conan and Kaito Kid have not met in the Detective Conan series until now."]


In [None]:
#Question 4
question = "Why does Haibara attend school? After his body shrank, Conan attends school in order to look like a normal child especially in front of Ran and Kogoro, but why does Haibara attend school? Why does not she dedicate her time trying to redevelop the APTX drug and trying to find an antibody to it?"

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[' Haibara attends school to prevent the Black Organization from knowing that she had shrunk into a child. She and the Professor created her alias "Ai Haibara" and enrolled in Teitan Elementary School to keep her identity hidden. Additionally, Haibara is also a member of the Detective Boys, which allows her to work with Conan and the others to solve mysteries and gather information about the Black Organization. While Haibara may have the knowledge and skills to redevelop the APTX drug and find an antibody to it, her primary focus is on staying hidden and gathering information to help take down the Black Organization.']


In [None]:
#Question 5
question = "When did Conan first met Haibara's sister? At the beginning of episode 128, Conan said that he knew Masami, Haibara's sister. However, we have never seen them meeting before, how is that possible?  In the next episode, Masami told her sister that she suspects Conan to be Shinichi. I do not think she would think that unless she knew Conan well.At the beginning of episode 128, Conan said that he knew Masami, Haibara's sister. However, we have never seen them meeting before, how is that possible? In the next episode, Masami told her sister that she suspects Conan to be Shinichi. I do not think she would think that unless she knew Conan well."

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[" Conan and Haibara's sister, Akemi Miyano, met each other in the past when Conan was still Shinichi Kudo. This is implied in the documents when it is mentioned that Akemi suspects Conan to be Shinichi, indicating that they have met before. However, the specific details of their meeting are not provided in the documents."]


In [None]:
#Question 6
question = "How dies Conan know the names of Vodka and Gin in the anime? I watched the anime many times but in the first episode of Detective Conan, both Vodka and Gin never mentioned their names. But later as the series continues, it looks like he knows their names even before Haibara appeared in the series."

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[' Conan learns their codenames and overhears Gin explain to Vodka the details of the bombing plan in episode 11 of Detective Conan (Case Closed) titled "The Black Organization: Murderous Intentions on the Shinkansen". This episode is the first time Conan learns their codenames and overhears their plan.']


In [None]:
#Question 7
question = "When did Conan's glasses first get zoom functionality added? In epsisode 466, Conan sees something down in a lake and Agasa says: Use the magnifying function of your glasses Allowing Conan to zoom in and see the body floating. I don't recall this functionality ever being mentioned before. The way he mentions it and the way Conan responds makes it seem like it is obvious they both knew about it at this point. When did Professor Agasa add this feature to Conan's glasses?"

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[" The telescopic lenses functionality was added to Conan's glasses in Movie 5, which was released before Episode 466. The movie was released on April 17, 2004, and the episode was aired on November 21, 2011. Therefore, the telescopic lenses functionality had been added to Conan's glasses for at least 7 years and 7 months before Episode 466."]


In [None]:
#Question 8
question = "Who is the woman who talks to young Tooru Amuro/Rei Furuya? Tooru Amuro or Rey Furuya is the Black Organization member nicknamed Bourbon. He actually something like secret police agent (I can't remember the organization's name) who is working undercover in BO. He as Tooru Amuro works at Poirot Cafe, and become Kogoro's apprentice. I remember a case when Amuro, Conan, Ran and Kogoro are at the hospital and there's a case where a posion is used in a tea. After Amuro and Conan realise who the suspect of the cases is, there's a kid with her mother counting the lift that coming from the upper floors. When the kid counts to zero (they are on the ground floor), Amuro hears that and he seems surprised about it. He also has a flashback to when he was a kid and was called zero by a woman. So, who is she? Who's the woman who speaks to with young Amuro? What is their relation? And also why he called zero by this woman?"

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[' The woman who speaks to young Tooru Amuro/Rei Furuya is his mother. She calls him "Zero" because it is his codename in the National Police Agency, where he works undercover. The reason for his codename is not explicitly stated in the document, but it is mentioned that Rei Furuya\'s colleagues from the Public Security Bureau are ordered by him to carry out special operations for the National Police Agency, indicating that he has a high-ranking position in the organization. The document also mentions that Rei Furuya\'s hostility with Shuichi Akai comes from an incident involving an unspecified man, about whom Shuichi says he regrets. This man may be related to Rei Furuya\'s past and his involvement in the National Police Agency and the Black Organization.']


In [None]:
#Question 9
question = "How old is Conan meant to be? I was rewatching an old episode of Detective Conan with a friend and we started discussing how weird it would be to hear a young child say some of the things Conan says. Then, I realized I didn't know the exact age that Conan was really supposed to be acting. I don't recall Conan's age to ever be mentioned in the show. How old is Conan supposed to be now? What was his age before he shrank?"

In [None]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [None]:
print(results["llm"]["replies"])

[' Conan is supposed to be 17 years old before he shrank and is still 17 years old after shrinking.']


In [17]:
#Question 10
question = "Why did Heiji's mom have to lie to Mouri and Conan in episode 220? At the end of episode 220 of Detective Conan, The Client Full of Lies, Heiji appears and explains the reason behind the fake name Reiko to Conan that it was to conceal the fact that his mom is from Osaka. But that doesn't explain why she has to lie about her being good at cooking and her allergy to dust. If she were to hide her identity, she need not lie about the cooking and the allergies. Conan wouldn't be able to find out even if he knew that she was from Osaka since it was his first time meeting Heiji's mother. Why did she have to go that far? Or was it part of her test for Mouri to figure out her real identity? Even if it's the case, I bet no one's going to be able to deduce that she is Heiji's mother because first, it's impossible to even know the true identity of a person just by knowing her hobbies or skills. Unless she has a distinctive quality in which the Hattori's are famous for. Second, Conan did not even know a thing about Heiji's mom. The test was actually for Shinichi but since he wasn't there, she tried Mouri instead. No matter how good Conan's deductions were, it was impossible for him to expose her real identity as Heiji's mom and her real intentions. They were even surprised when she revealed her true identity as Heiji's mom since Heiji didn't even look like her. So, why did she have to lie about all the stuff?"

In [18]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [19]:
print(results["llm"]["replies"])

[" It is not explicitly stated in the document why Heiji's mother had to lie about her cooking skills and allergy to dust. It is possible that she was testing Mouri and Conan's detective skills, but it is also possible that she had other reasons for lying. Without additional information, it is difficult to determine the exact reason for her deception."]


In [20]:
#Question 11
question = "What happened to Mouri in episode 219? In episode 219 of Detective Conan, The Gathering of the Detectives! Shinichi Kudo vs. Kaitou Kid, after Senma jumped out of the helicopter, Mouri quickly jumps out and reveals his identity as Kaito Kid. Kid saves Senma from falling. But what happened to the real Mouri? If the Mouri in the chopper was actually Kid, where was the real Mouri?"

In [21]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [22]:
print(results["llm"]["replies"])

[' The real Mouri was likely not in the helicopter. It is possible that Kid disguised himself as Mouri before getting on the helicopter, or that Kid incapacitated Mouri and took his place. The document does not provide enough information to determine exactly what happened to the real Mouri.']


In [23]:
#Question 12
question = "Why does Conan call Bourbon a liar?  I still don't get the context of this at the end of chapter 897"

In [24]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [25]:
print(results["llm"]["replies"])

[" In chapter 897, Conan calls Bourbon a liar because Bourbon had previously claimed that Akai Shuichi, a character who was believed to be dead, was still alive. Conan does not believe this claim, and therefore calls Bourbon a liar. This occurs during a conversation between Conan and Bourbon, where Bourbon is trying to gather information about Akai from Conan. Conan, however, is suspicious of Bourbon's motives and does not want to reveal any information about Akai. This leads to Conan calling Bourbon a liar and the conversation ending on a tense note."]


In [26]:
#Question 13
question = "Why doesn't Conan reveal his identity at least to Ran in Detective Conan (Case Closed)? Why doesn't Shinichi Kudo tell about his problem to Ran in Detective Conan, when he completely understands how much the girl loves him and suffers from pain due to him? While the truth is many times she was almost certain that Conan was Shinichi himself. The series now has so many episodes. Come on! She needs some sympathy."

In [27]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [28]:
print(results["llm"]["replies"])

[" Conan doesn't reveal his identity to Ran because he is afraid of putting her in danger. Shinichi doesn't tell Ran about his problem because he also doesn't want to put her in danger and he wants to protect her. Additionally, Conan/Shinichi believes that revealing the truth would only cause Ran more pain and suffering, as he would be putting her in a situation where she would have to choose between her love for him and her own safety. The series has many episodes, but the truth has not been revealed yet because it is a crucial part of the plot and the development of the characters. Ran's sympathy is not the main focus of the story, but rather the growth and development of Conan/Shinichi as a detective and a person."]


In [29]:
#Question 14
question = "Why do Conan and the Detective Boys call Ai Haibara-san?"

In [30]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [31]:
print(results["llm"]["replies"])

[' Conan and the Detective Boys call Ai Haibara-san as a sign of respect towards her.']


In [32]:
#Question 15
question = "Why did not Conan tell Haibara about the liquor that made him restore to his original size as soon as possible? In episode 177, Reunion with the Black Organization, Conan asked Haibara to drink the liquor which made her restore to her original size, but why did not he tell her that the first time he met her or at least after he trusted her? This is a very important information which cannot be delayed."

In [33]:
results = rag_pipeline.run(
    {
        "retriever": {"query": question},
        "prompt_builder": {"question": question},
    }
)

In [34]:
print(results["llm"]["replies"])

[' The documents do not provide enough information to answer this question. The document mentions that Conan trusts Haibara and regards her as a friend, but it does not mention when or how this trust was established. It is possible that Conan did not tell Haibara about the liquor earlier because he did not trust her at first, but this is only speculation. Without more information, it is not possible to determine why Conan did not tell Haibara about the liquor earlier.']
