In [1]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import TextLoader
import os
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings.sentence_transformer import (
    SentenceTransformerEmbeddings,
)
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough

# 1. indexing

## load&split

In [2]:
text_loader = TextLoader('sherlock.txt', encoding='utf-8')
text = text_loader.load()

In [3]:
text_splitter =RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
text= text_splitter.split_documents(text)

In [4]:
loader = PyPDFLoader("HolmesAndTheHistoryOfDetectiveFiction.pdf")
pages = loader.load_and_split()

In [5]:
docs=pages+text

In [6]:
len(text)

5205

In [7]:
len(pages)

19

In [8]:
len(docs)

5224

## embedding&store

In [9]:
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(docs, embeddings)

In [10]:
query="sherlock get mad"
docs=db.similarity_search(query)

In [11]:
print(docs[0].page_content)

Sherlock Holmes was too irritable for conversation and too restless
     for sleep. I left him smoking hard, with his heavy, dark brows
     knotted together, and his long, nervous fingers tapping upon the arms
     of his chair, as he turned over in his mind every possible solution
     of the mystery. Several times in the course of the night I heard him
     prowling about the house. Finally, just after I had been called in
     the morning, he rushed into my room. He was in his dressing-gown, but
     his pale, hollow-eyed face told me that his night had been a
     sleepless one.

     "What time was the funeral? Eight, was it not?" he asked eagerly.
     "Well, it is 7.20 now. Good heavens, Watson, what has become of any
     brains that God has given me? Quick, man, quick! It's life or
     death--a hundred chances on death to one on life. I'll never forgive
     myself, never, if we are too late!"


# Retrieval&generation

## model 불러오기&prompt template만들기

In [12]:
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
llm = ChatOpenAI(model="gpt-4o-mini", api_key=OPENAI_API_KEY)

In [13]:
prompt_template = PromptTemplate.from_template(
    """
    You are sherlockian AI
    You get information from contex: {context} and answer question:{question}
    just using context I gave you. 
    Tell me like Shirlockian who prides himself on the Sherlock Holmes series.
    Answer as if the Context i give was something you already knew. 
    Don't show in the response that they were given.
    """
)

## retriver 만들기

In [14]:
retriever = db.as_retriever()  

## 가져온 데이터를 합치는 function

In [15]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

## chain만들기

In [16]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | llm
)

In [17]:
message=rag_chain.invoke("Who is Mycroft Holmes?")

In [18]:
print(message.content)

Mycroft Holmes, as I gather from the dialogue, is the elder brother of Sherlock Holmes. He is described with a certain degree of esteem and is recognized for his position, notably as the head of the Submarine Department. His keen intellect and analytical abilities are implied, as he is eager to understand the situation at hand and is involved in the examination of a critical case. Mycroft's presence in the narrative signifies a formidable intellect that rivals Sherlock's, yet he often prefers the comforts of bureaucratic life over the active pursuit of fieldwork. Thus, he serves as both a confidant and a source of support for Sherlock, illustrating the deep connection between the two brothers.


In [19]:
message=rag_chain.invoke(
    "What is the most successful detective novel before Holmes appeared?"
)

In [20]:
print(message.content)

Ah, the annals of detective fiction before the advent of our esteemed Sherlock Holmes reveal a fascinating tapestry, woven with threads of ingenuity and intrigue. Notably, the most successful detective novel prior to Holmes's illustrious debut was penned by the New Zealand barrister Fergus Hume. His masterful work, *The Mystery of a Hansom Cab*, published in 1886, achieved remarkable acclaim, selling over 300,000 copies in Britain alone within merely six months of its release.

This novel, with its intricate plot and deftly crafted characters, captivated the reading public and set a high bar for the genre. While it may lack the scientific method and the iconic status of Holmes, it undeniably laid significant groundwork for the detective fiction that would follow. In fact, Hume’s work is a testament to the rich landscape of mystery literature that existed prior to Conan Doyle’s celebrated detective, and it serves as a reminder that the genre was already thriving, filled with its own com