In [21]:
!pip install langchain qdrant_client openai tiktoken pypdf


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [22]:
from langchain.vectorstores import Qdrant
from langchain.embeddings.openai import OpenAIEmbeddings
import qdrant_client
import os
from dotenv import load_dotenv
from langchain.document_loaders import PyPDFLoader

In [6]:
# Create a client with qdrant

load_dotenv()

client = qdrant_client.QdrantClient(
    os.getenv("QDRANT_HOST"), api_key=os.getenv("QDRANT_API")
)

In [8]:
# Create collection in the Cluster

os.environ["QDRANT_COLLECTION_NAME"] = "vector_collection"

vectors_config = qdrant_client.http.models.VectorParams(
    size=1536,
    distance=qdrant_client.http.models.Distance.COSINE,
)

client.recreate_collection(
    collection_name=os.getenv("QDRANT_COLLECTION_NAME"),
    vectors_config=vectors_config,
)
# Check with Thunder Client that collection has been created

True

In [10]:
embeddings = OpenAIEmbeddings()

# Initialize vector store object

vector_store = Qdrant(
    client=client,
    collection_name=os.getenv("QDRANT_COLLECTION_NAME"),
    embeddings=embeddings,
)

In [47]:
# Load PDF into text
loader = PyPDFLoader("/Users/ivanzhirnov/Downloads/paper.pdf")
pages = loader.load_and_split()
text = ""
for page in pages:
    text += page.page_content

In [48]:
# Add documents to vector store

from langchain.text_splitter import CharacterTextSplitter


def get_chunks(text):
    text_splitter = CharacterTextSplitter(
        separator="\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len,
    )

    chunks = text_splitter.split_text(text)
    return chunks


texts = get_chunks(text)

In [60]:
pages[0].page_content[100:200].replace("\n", " ")

'huo Fenga,⇑, Zhuoer Chenb,⇑, Benjamin Bircherc,Z eJ ia, Lars Nyborgb, Samuel Bigota aSchool of Engin'

In [62]:
# Add vectors to Qdrant collection
vector_store.add_texts(texts)

['5de1bba09c63d1e74a422be678d454d1',
 'f87a503ccc709c2048b40d811a931aaf',
 '806cc92753ed639123a3820f4f67039b',
 'e3416a9f859eccb7032ddf488870362a',
 'f81e308d21185dc07e245558849e3185',
 '8967857baa4e3591e34ecc8560e78ba5',
 '3e051127c1959c9055fed0efc2fc82b7',
 '38d2a25f87fad98192775bf1cb14a59c',
 '08a6b0110db0e85863a221b08ed0a1ef',
 '18db9a81847b01ce1f0e68eb74af5701',
 '4f307950b17e2c3500ebd3443b9dc9fc',
 'c1f39ff0365b49b33fc7a04b5fb7e3fb',
 '41152e9e70c1fda83c95c6a69648bd41',
 '9272e14f4419c18eef93cda4e6d824be',
 'b1862a70a07dbaacc1e1fc8291bc2386',
 '11dc9106276edfe69ee9b6f28328ecb3',
 'b85ba11a7d9c7861ef7141bbb99d7395',
 '0f0b75988af2c9dbb021c6befaa0010c',
 '8fe827b4802ebf98e4a00804bf24bf2c',
 'd12d846732b7e2ec625a178628dcc426',
 '990a07d822bcff318949535377cf3ef8',
 '465b4bc0493f4b5d1591831cd8572619',
 '6b1597b4fca3ab3262319f412563cef2',
 '9910c2b9602a6a4df4e98fb310c4f185',
 '7f35059a78f2a4ae6b36d08309c55ec7',
 'bcb615b17102cfe77fe68df6a847ffb7',
 '8d608e9802f7564664cee1db8e685a39',
 

#### Connect vector store with prompt application

In [65]:
# Plug vector store into retrieval chain

from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

qa = RetrievalQA.from_chain_type(
    llm=OpenAI(), chain_type="stuff", retriever=vector_store.as_retriever()
)

In [71]:
query = "What is a main output of this paper?"

response = qa.run(query)

print(response)

 The main output of this paper is an investigation of the influence of processing parameters on the formation of defects in laser powder bed fusion (LPBF) experiments conducted on an EOS M290 machine.
