# Question and Answer

#### Imports

In [25]:
import os
import openai
import sys
from dotenv import load_dotenv, find_dotenv

import numpy as np


#Embedding
from langchain_openai import OpenAIEmbeddings

#Vector Database
from langchain_chroma import Chroma

#Loader
from langchain_community.document_loaders import PyPDFLoader

#Splitter
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFDirectoryLoader

#Self-Query Retriever

from langchain_openai import ChatOpenAI
from langchain_classic.chains.query_constructor.schema import AttributeInfo
from langchain_classic.retrievers.self_query.base import SelfQueryRetriever

#Retrieve Information Compression
from langchain_classic.retrievers import ContextualCompressionRetriever
from langchain_classic.retrievers.document_compressors import LLMChainExtractor

#RetrievalQA chain
from langchain_classic.chains import RetrievalQA

##### OpenAI API KEX

In [17]:
sys.path.append('../..')
_ = load_dotenv(find_dotenv()) 
api_key = os.environ['OPENAI_API_KEY']

#### Embedding and VectorDB

In [19]:

# 1. Load all PDFs from the ./docs directory
loader = PyPDFDirectoryLoader("./docs")
documents = loader.load()

# 2. Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
docs = text_splitter.split_documents(documents)

# 3. Create embeddings
embedding = OpenAIEmbeddings()

# 4. Store chunks in Chroma DB (persist to ./docs)
vectordb = Chroma.from_documents(
    documents=docs,
    embedding=embedding,
    persist_directory="./docs"
)
print("PDFs loaded into vector database successfully.")

PDFs loaded into vector database successfully.


In [20]:
print(vectordb._collection.count())

3040


#### Similarity Search in VectorDB

In [22]:
question = "What is Neural Network Quantum State?"
similar_docs = vectordb.similarity_search(question, k = 3)
len(similar_docs)

3

#### LLM Model

In [23]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

##### RetrievalQA chain

In [26]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever = vectordb.as_retriever()
)

In [27]:
result = qa_chain({"query": question})

  result = qa_chain({"query": question})


APIConnectionError: Connection error.