In [3]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance
from langchain.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain_openai import OpenAI

In [7]:
pdf_loader = PyPDFLoader("Machine_Translation_Approaches_and_Design_Aspects.pdf")
pdf_loader = pdf_loader.load()
pdf_loader

[Document(metadata={'producer': 'Microsoft® Word 2010', 'creator': 'Microsoft® Word 2010', 'creationdate': '2014-01-07T16:00:19+05:30', 'author': 'ad1', 'moddate': '2014-01-07T16:00:19+05:30', 'rgid': 'PB:269750413_AS:510886377988096@1498816212147', 'source': 'Machine_Translation_Approaches_and_Design_Aspects.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1'}, page_content='See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/269750413\nMachine Translation Approaches and Design Aspects\nArticle\xa0\xa0in \xa0\xa0IOSR Journal of Computer Engineering · January 2014\nDOI: 10.9790/0661-16122225\nCITATIONS\n11\nREADS\n5,614\n2 authors:\nRuchika Sinhal\n25 PUBLICATIONS\xa0\xa0\xa082 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nKapil Gupta\nSt Vincent Pallotti College of Engineering & Technology\n18 PUBLICATIONS\xa0\xa0\xa095 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nAll content following this page was uploaded by Ruchika Sinhal on 30 June 2017.\nT

In [10]:
chunk = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=False)
split_doc = chunk.split_documents(pdf_loader)
split_doc

[Document(metadata={'producer': 'Microsoft® Word 2010', 'creator': 'Microsoft® Word 2010', 'creationdate': '2014-01-07T16:00:19+05:30', 'author': 'ad1', 'moddate': '2014-01-07T16:00:19+05:30', 'rgid': 'PB:269750413_AS:510886377988096@1498816212147', 'source': 'Machine_Translation_Approaches_and_Design_Aspects.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1'}, page_content='See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/269750413\nMachine Translation Approaches and Design Aspects\nArticle\xa0\xa0in \xa0\xa0IOSR Journal of Computer Engineering · January 2014\nDOI: 10.9790/0661-16122225\nCITATIONS\n11\nREADS\n5,614\n2 authors:\nRuchika Sinhal\n25 PUBLICATIONS\xa0\xa0\xa082 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nKapil Gupta\nSt Vincent Pallotti College of Engineering & Technology\n18 PUBLICATIONS\xa0\xa0\xa095 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nAll content following this page was uploaded by Ruchika Sinhal on 30 June 2017.\nT

In [None]:
client = QdrantClient(":memory:")
client.create_collection(
collection_name="zoomcamp-project1",
vectors_config=VectorParams(size=384, distance=Distance.COSINE)
)

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

vector_store = QdrantVectorStore(
client=client,
collection_name="zoomcamp-project1",
embedding=embedding_model
)
model = OpenAI(base_url="http://0.0.0.0:1233/v1",  api_key="lm-studio",model="llama-3.2-3b-instruct")

vector_store.add_documents(split_doc)

retriever = vector_store.as_retriever(search_type="similarity_score_threshold",search_kwargs={"score_threshold": 0.5, "k": 1})

system_prompt = (
"You are a smart assistant. "
"Provide concise and professional responses from the given context.\n\n{context}"
)
prompt = ChatPromptTemplate.from_messages([
("system", system_prompt),
("human", "{input}")
])

  embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [12]:
stuff_chain = create_stuff_documents_chain(model, prompt)
chain = create_retrieval_chain(retriever, stuff_chain)

In [16]:
chain.invoke({"input":"what is this article about"})

{'input': 'what is this article about',
 'context': [Document(metadata={'producer': 'Microsoft® Word 2010', 'creator': 'Microsoft® Word 2010', 'creationdate': '2014-01-07T16:00:19+05:30', 'author': 'ad1', 'moddate': '2014-01-07T16:00:19+05:30', 'rgid': 'PB:269750413_AS:510886377988096@1498816212147', 'source': 'Machine_Translation_Approaches_and_Design_Aspects.pdf', 'total_pages': 5, 'page': 0, 'page_label': '1', '_id': '8ae36942cec74d33b7ae4fa7f5581bf8', '_collection_name': 'zoomcamp-project1'}, page_content='See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/269750413\nMachine Translation Approaches and Design Aspects\nArticle\xa0\xa0in \xa0\xa0IOSR Journal of Computer Engineering · January 2014\nDOI: 10.9790/0661-16122225\nCITATIONS\n11\nREADS\n5,614\n2 authors:\nRuchika Sinhal\n25 PUBLICATIONS\xa0\xa0\xa082 CITATIONS\xa0\xa0\xa0\nSEE PROFILE\nKapil Gupta\nSt Vincent Pallotti College of Engineering & Technology\n18 PUBLICATI