In [30]:
import openai
import langchain
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain_openai import OpenAI
from langchain_pinecone import PineconeVectorStore

In [31]:
from dotenv import load_dotenv
load_dotenv()
import os
pinecone_api_key = os.getenv("PINECONE_API_KEY")
openai_api_key=os.getenv("OPENAI_API_KEY")

In [32]:
## lets read the document
def read_doc(directory):
    file_loders=PyPDFDirectoryLoader(directory)
    documents=file_loders.load()
    return documents

In [33]:
doc=read_doc('documents/')
doc

[Document(metadata={'source': 'documents\\AJAI.pdf', 'page': 0}, page_content='AJAI V\n9150160723 | ajaipetro@gmail.com | linkedin.com/in/v-ajai-838345194 | https://github.com/VAJAI\nTECHNICAL SKILLS\nProgramming Languages : Python, C++, C, Basic JAVA\nFrameworks - Libraries: Django, React.js, Node.js\nFront-End Development : HTML, CSS, Bootstrap, JavaScript, jQuery\nBack-End Development : MySQL, Microsoft SQL Server\nDevelopment Tools:Git, Visual Studio Code, Code Blocks\nPROJECTS\nEcommerce Website - Pic Snacks DEC 2023 – APR 2024\nRole: Full Stack Developer HTML, CSS, JavaScript\n• Engineered a fully responsive e-commerce platform for snack products, achieving a 20% boost in mobile engagement and a 15%\nincrease in overall site trafﬁc. .\n• Designed a user-friendly interface that enhanced customer engagement by25% and decreased bounce rates by30%\n• Optimized site navigation for multi-device compatibility, increasing user retention by20%.\nPortfolio Website APR 2024 – AUG 2024\nRole

In [34]:
## divide the doc the chunks
def chunk_data(docs,chunk_size=800,chunk_overlap=50):
    text_spiliters=RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    doc=text_spiliters.split_documents(docs)
    return doc

In [35]:
document=chunk_data(docs=doc)
document

[Document(metadata={'source': 'documents\\AJAI.pdf', 'page': 0}, page_content='AJAI V\n9150160723 | ajaipetro@gmail.com | linkedin.com/in/v-ajai-838345194 | https://github.com/VAJAI\nTECHNICAL SKILLS\nProgramming Languages : Python, C++, C, Basic JAVA\nFrameworks - Libraries: Django, React.js, Node.js\nFront-End Development : HTML, CSS, Bootstrap, JavaScript, jQuery\nBack-End Development : MySQL, Microsoft SQL Server\nDevelopment Tools:Git, Visual Studio Code, Code Blocks\nPROJECTS\nEcommerce Website - Pic Snacks DEC 2023 – APR 2024\nRole: Full Stack Developer HTML, CSS, JavaScript\n• Engineered a fully responsive e-commerce platform for snack products, achieving a 20% boost in mobile engagement and a 15%\nincrease in overall site trafﬁc. .\n• Designed a user-friendly interface that enhanced customer engagement by25% and decreased bounce rates by30%'),
 Document(metadata={'source': 'documents\\AJAI.pdf', 'page': 0}, page_content='• Optimized site navigation for multi-device compatibili

In [36]:
## embedding techniques of OPENAI
embeddings=OpenAIEmbeddings(api_key=openai_api_key)
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000235161E1480>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000235161F22F0>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key='sk-15YLdtpG_cLJdKEeYvdyfVHmc5ukBDCKauPt7drdAET3BlbkFJNMuuyNs2Cx_HiG5oh3hveNUBYOWPZ9ONnnvTKrP54A', openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None)

In [37]:
vectors=embeddings.embed_query("how are you")
len(vectors)

1536

In [38]:
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
pc=Pinecone(api_key=pinecone_api_key)
index=pc.Index("vectorlangchain")
vector_store = PineconeVectorStore.from_documents(
    documents=document,
    embedding=embeddings,
    index_name="vectorlangchain"
)

In [39]:

query = "What the data about technical skills in this document?"
retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"k":3,'score_threshold': 0.8}
    )

relevent_doc=retriever.invoke(query)

for i,doc in enumerate(relevent_doc,1):
    print(f"Document {i}:\n {doc.page_content}\n")
    print(f"Source: {doc.metadata['source']}\n")


Document 1:
 efﬁciency by 40%.
• Contributed to developing and debugging internal tools, improving operational efﬁciency by 15%.
• Developing full-stack solutions utilizing Python and SQL, leading to a35% boostin coding efﬁciency and a20% reductionin
database query times.
EDUCATION
Rajiv Gandhi College of Engineering Chennai
B.Tech in Petroleum Engineering Aug 2019 – May 2023
Vaani Matric Higher Secondary School Vaniyambadi
HSE- Bio Maths Apr 2018 – May 2019
CERTIFICATIONS
• Python Full Stack Developer Certiﬁcation - Pumo Technovation, Chennai
• TCS iON NQT IT Certiﬁcation - Achieved a score of 62.87%

Source: documents\AJAI.pdf

Document 2:
 AJAI V
9150160723 | ajaipetro@gmail.com | linkedin.com/in/v-ajai-838345194 | https://github.com/VAJAI
TECHNICAL SKILLS
Programming Languages : Python, C++, C, Basic JAVA
Frameworks - Libraries: Django, React.js, Node.js
Front-End Development : HTML, CSS, Bootstrap, JavaScript, jQuery
Back-End Development : MySQL, Microsoft SQL Server
Development T

In [40]:
stats = index.describe_index_stats()
print("Index Stats:", stats)


Index Stats: {'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 3}},
 'total_vector_count': 3}
