In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.vectorstores import Pinecone as PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
import os

### Loading of data 
#### we cannot load data direclty as plain text, we have to load data with document_loader as it creates meta data as well

In [2]:
loader= TextLoader("/Users/vikaslakka/Desktop/FSDS/GenAI/Hackathon/Policy_bot/Leave_Policy.txt")

In [3]:
data= loader.load()

### The below data has document of page content, not just text

In [4]:
data

[Document(page_content='Leave Policy:\n\n1. Annual Leave (Paid Time Off)\n\n\t1.1 All full-time employees are entitled to 10 days of paid annual leave per calendar year, to be accrued on a pro-rata basis depending on the length of service.\n\n\t1.2 Annual leave requests must be submitted at least 10 days in advance, except in cases of emergencies or unforeseen circumstances.\n\n\t1.3 Approval of annual leave requests is subject to the operational requirements of the company and the availability of adequate staffing.\n\n\t1.4 Unused annual leave cannot be carried forward to the next calendar year unless otherwise approved by the management.\n\n2. Sick Leave\n\n\t2.1 All employees are entitled to 10 days of paid sick leave per calendar year.\n\n\t2.2 Sick leave requests must be communicated to the immediate supervisor or HR department as soon as possible on the day of absence.\n\n\t2.3 Medical certification may be required for sick leave exceeding 10 consecutive days.\n\n3. Bereavement L

In [5]:
print(data[0].page_content)

Leave Policy:

1. Annual Leave (Paid Time Off)

	1.1 All full-time employees are entitled to 10 days of paid annual leave per calendar year, to be accrued on a pro-rata basis depending on the length of service.

	1.2 Annual leave requests must be submitted at least 10 days in advance, except in cases of emergencies or unforeseen circumstances.

	1.3 Approval of annual leave requests is subject to the operational requirements of the company and the availability of adequate staffing.

	1.4 Unused annual leave cannot be carried forward to the next calendar year unless otherwise approved by the management.

2. Sick Leave

	2.1 All employees are entitled to 10 days of paid sick leave per calendar year.

	2.2 Sick leave requests must be communicated to the immediate supervisor or HR department as soon as possible on the day of absence.

	2.3 Medical certification may be required for sick leave exceeding 10 consecutive days.

3. Bereavement Leave

	3.1 In the unfortunate event of the death of

#### We will split the text into chunks with chunk size of 500

In [6]:
text_splitter= RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)

In [7]:
text_chunks= text_splitter.split_documents(data)

In [8]:
text_chunks

[Document(page_content='Leave Policy:\n\n1. Annual Leave (Paid Time Off)\n\n\t1.1 All full-time employees are entitled to 10 days of paid annual leave per calendar year, to be accrued on a pro-rata basis depending on the length of service.\n\n\t1.2 Annual leave requests must be submitted at least 10 days in advance, except in cases of emergencies or unforeseen circumstances.\n\n\t1.3 Approval of annual leave requests is subject to the operational requirements of the company and the availability of adequate staffing.', metadata={'source': '/Users/vikaslakka/Desktop/FSDS/GenAI/Hackathon/Policy_bot/Leave_Policy.txt'}),
 Document(page_content='1.4 Unused annual leave cannot be carried forward to the next calendar year unless otherwise approved by the management.\n\n2. Sick Leave\n\n\t2.1 All employees are entitled to 10 days of paid sick leave per calendar year.\n\n\t2.2 Sick leave requests must be communicated to the immediate supervisor or HR department as soon as possible on the day of 

In [9]:
len(text_chunks)

6

### we can view chunks of data using list attributes


In [10]:
print(text_chunks[1].page_content)

1.4 Unused annual leave cannot be carried forward to the next calendar year unless otherwise approved by the management.

2. Sick Leave

	2.1 All employees are entitled to 10 days of paid sick leave per calendar year.

	2.2 Sick leave requests must be communicated to the immediate supervisor or HR department as soon as possible on the day of absence.

	2.3 Medical certification may be required for sick leave exceeding 10 consecutive days.

3. Bereavement Leave


## Create embeddings using pine cone

In [11]:
import os

In [12]:
embedding= OpenAIEmbeddings()

  warn_deprecated(


## We willconsider an example of embeddings

In [13]:
#len(embedding.embed_query("I am vikas Lakka"))

In [14]:
## The below commented import doesn't hav efrom_text to convert text to embeddings, hence using langchain library
from pinecone import Pinecone
##from langchain.vectorstores.pinecone import Pinecone

  from tqdm.autonotebook import tqdm


In [15]:
##Get APi key and environment from pinecone
PINECONE_API_KEY= os.environ['PINECONE_API_KEY']
PINECONE_API_ENV=os.environ['PINECONE_API_ENV']

In [16]:
PINECONE_API_KEY

'd53d4456-67e3-4701-baab-3141f5107a93'

### Initiate pinecone

In [17]:
pc= Pinecone(api_key= PINECONE_API_KEY,
              environment= PINECONE_API_ENV
    
)

### After creating index, Its time to define it.

In [18]:
index_name="policybot"

In [19]:
index= pc.Index("policybot")

### Creating embeddings for each of Text chunk

In [20]:
doc_chunks= [t.page_content for t in text_chunks]

In [21]:

docsearch= PineconeVectorStore.from_texts(doc_chunks, embedding, index_name= index_name)

In [22]:
docsearch

<langchain_community.vectorstores.pinecone.Pinecone at 0x134563f10>

In [23]:
query= "How many paternity leaves can i take?"

In [24]:
docs = docsearch.similarity_search(query)

In [25]:
docs

[Document(page_content='3.1 In the unfortunate event of the death of an immediate family member (spouse, child, parent, sibling), employees are entitled to 10 days of paid bereavement leave.\n\n\t3.2 Additional unpaid leave may be granted upon approval by the management.\n\n4. Maternity/Paternity Leave\n\n\t4.1 Female employees are entitled to 10 weeks of paid maternity leave following childbirth.\n\n\t4.2 Male employees are entitled to 10 weeks of paid paternity leave upon the birth or adoption of a child.'),
 Document(page_content='1.4 Unused annual leave cannot be carried forward to the next calendar year unless otherwise approved by the management.\n\n2. Sick Leave\n\n\t2.1 All employees are entitled to 10 days of paid sick leave per calendar year.\n\n\t2.2 Sick leave requests must be communicated to the immediate supervisor or HR department as soon as possible on the day of absence.\n\n\t2.3 Medical certification may be required for sick leave exceeding 10 consecutive days.\n\n3. 

In [26]:
print(docs[2].page_content)

Leave Policy:

1. Annual Leave (Paid Time Off)

	1.1 All full-time employees are entitled to 10 days of paid annual leave per calendar year, to be accrued on a pro-rata basis depending on the length of service.

	1.2 Annual leave requests must be submitted at least 10 days in advance, except in cases of emergencies or unforeseen circumstances.

	1.3 Approval of annual leave requests is subject to the operational requirements of the company and the availability of adequate staffing.


### We could able to generate similarity search
#### Lets create llm to sort this out

In [27]:
llm= OpenAI()

  warn_deprecated(


#### We will call a retrieval method to retrieve answers from questions

In [28]:
qa= RetrievalQA.from_chain_type(llm=llm, chain_type="stuff",
                                retriever= docsearch.as_retriever())

In [29]:
qa.InputType

typing.Dict[str, typing.Any]

In [30]:
qa.run(query)

  warn_deprecated(


' Male employees are entitled to 10 weeks of paid paternity leave upon the birth or adoption of a child. Additional unpaid leave may be granted upon approval by the management.'

In [31]:
import sys
while True:
    user_input= input(f"Input Prompt: ")
    if user_input=="exit":
        print("Exiting")
        sys.exit()
    
    if user_input == '':
        continue
    result = qa({'query': user_input})
    print(f"Answer: {result['result']}") 
    

  warn_deprecated(


Answer:  All employees are entitled to 10 days of paid sick leave per calendar year.
Answer:  There are two leave policies mentioned in the context.
Answer:  Annual leave, sick leave, bereavement leave, leave without pay, special leave
Exiting


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
