In [None]:
# !pip install -qU \
#   langchain==0.0.276 \
#   openai==0.27.10 \
#   tiktoken==0.4.0 \
#   sentence-transformers==2.2.2 \
#   spacy==3.6.1 \
#   nltk==3.8.1 \
#   pinecone-client==2.2.2 \
#   pypdf==3.15.4

In [11]:
from langchain.embeddings import OpenAIEmbeddings   
# from langchain.document_loaders import PyPDFLoader
# from langchain.text_splitter import CharacterTextSplitter, NLTKTextSplitter, TokenTextSplitter, SpacyTextSplitter, SentenceTransformersTokenTextSplitter
# from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import Pinecone
from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain

import pinecone
import time
# import itertools
# import uuid
# from tqdm.autonotebook import tqdm

from config import OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_ENVIRONMENT, PINECONE_INDEX_NAME, EMBEDDING_MODEL, SPLITTER_CHUNK_SIZE, SPLITTER_CHUNK_OVERLAP

Open AI Init

In [3]:
# llm = OpenAI(openai_api_key=OPENAI_API_KEY, temperature=0)
chat_model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, temperature=0)

embedding_model = OpenAIEmbeddings(
    openai_api_key=OPENAI_API_KEY, 
    model=EMBEDDING_MODEL
)


Pinecone Init

In [4]:
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment=PINECONE_ENVIRONMENT
)

if PINECONE_INDEX_NAME not in pinecone.list_indexes():
    # we create a new index if it doesn't exist
    pinecone.create_index(
        name=PINECONE_INDEX_NAME,
        metric='cosine',
        dimension=1536  # 1536 dim of text-embedding-ada-002
    )
    # wait for index to be initialized
    time.sleep(1)

pinecone_index = pinecone.Index(PINECONE_INDEX_NAME)
pinecone_index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.02418,
 'namespaces': {'': {'vector_count': 2418}},
 'total_vector_count': 2418}

In [10]:
vectorstore = Pinecone(pinecone_index, embedding_model, "text")

qa_chain = RetrievalQA.from_chain_type(
    llm=chat_model,
    retriever=vectorstore.as_retriever(search_type="mmr", search_kwargs={'k': 2}),
    return_source_documents=True
)
query = "Why detecting bolt looseness is important?"
answer = qa_chain({"query": query})
response = answer["result"]
sources = answer["source_documents"]

print("="*30)
print(" "*10 + "Response")
print("="*30)
print(response)
print("="*30)

print()

for i in range(min(3, len(sources))):
    print("="*30)
    print(" "*10 + f"Source [{i+1}]")
    print("="*30)
    print(sources[i].page_content)
    print("="*30)




# pinecone.init(api_key="***", environment="...")
# index = pinecone.Index("langchain-demo")
# embeddings = OpenAIEmbeddings()
# vectorstore = Pinecone(index, embeddings.embed_query, "text")

          Response
Detecting bolt looseness is important because it can lead to serious safety hazards and equipment failures. If bolts are not properly tightened, they can gradually loosen over time, which can result in structural instability and potential fractures. This can pose a significant risk to the integrity of the equipment or structure, as well as the safety of personnel working in the vicinity. Therefore, detecting bolt looseness is crucial to ensure the reliability and safety of the equipment or structure.

          Source [1]
Chapter 2
Research Goals and Outcomes
2.1 Research Goals
This project aims to explore the following three potential solutions to detecting bolt looseness
          Source [2]
 bolt looseness and fracture, manual inspection during regular
maintenance is currently the most common method adopted by hydro-power plants. However, this

