In [1]:
import os
from dotenv import load_dotenv

load_dotenv()

if os.environ['GOOGLE_API_KEY']:
    print("API Key found.")
else:
    raise EnvironmentError("API Key not found in environment variables.")

API Key found.


In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(
    model="gemini-3-flash-preview",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2
)

# RAG Implementation with our PDF data

## Step 1 - Extracting text from the pdf data

In [3]:
from langchain_community.document_loaders.pdf import PyPDFLoader

pdf_path = '../DOCS/Attendance_Policy.pdf'

loader = PyPDFLoader(pdf_path)

docs = loader.load()
docs

[Document(metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2022-09-20T21:06:46+05:30', 'author': 'Lee, Yvette', 'moddate': '2023-05-02T17:54:29+05:30', 'source': '../DOCS/Attendance_Policy.pdf', 'total_pages': 4, 'page': 0, 'page_label': '1'}, page_content='Attendance Policy  \n \nObjective \nThe purpose of this policy is to set forth VLCC’s policy and procedures for handling employee absences \nand tardiness to promote the efficient operation of the company and minimize unscheduled absences.  \n \nEffective Date – 21.3.2022 \n \nPolicy \nPunctual and regular attendance is an essential responsibility of each employee at VLCC. Employees also \nare expected to remain at work for their entire work schedule. Late arrival, early departure or other \nabsences from scheduled hours are disruptive and must be avoided.  \n \nAbsence \n“Absence” is defined as the failure of an employee to report for work when he or she is scheduled to \nwork. The t

In [4]:
docs[0].metadata

{'producer': 'Microsoft® Word 2013',
 'creator': 'Microsoft® Word 2013',
 'creationdate': '2022-09-20T21:06:46+05:30',
 'author': 'Lee, Yvette',
 'moddate': '2023-05-02T17:54:29+05:30',
 'source': '../DOCS/Attendance_Policy.pdf',
 'total_pages': 4,
 'page': 0,
 'page_label': '1'}

### Creating our own metadata

In [5]:
for i in docs:
    i.metadata = {
                    "source" : "VLCC",
                    "producer" : "HR"
                  }

In [6]:
docs[0].metadata

{'source': 'VLCC', 'producer': 'HR'}

## Step 2 - Splitting the documents into chunks

In [7]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

chunks=splitter.split_documents(docs)

chunks

[Document(metadata={'source': 'VLCC', 'producer': 'HR'}, page_content='Attendance Policy  \n \nObjective \nThe purpose of this policy is to set forth VLCC’s policy and procedures for handling employee absences \nand tardiness to promote the efficient operation of the company and minimize unscheduled absences.  \n \nEffective Date – 21.3.2022 \n \nPolicy \nPunctual and regular attendance is an essential responsibility of each employee at VLCC. Employees also \nare expected to remain at work for their entire work schedule. Late arrival, early departure or other \nabsences from scheduled hours are disruptive and must be avoided.  \n \nAbsence \n“Absence” is defined as the failure of an employee to report for work when he or she is scheduled to \nwork. The two types of absences are defined below: \n\uf0b7 Excused absence occurs when all the following conditions are met:  \n\uf0a7 The employee provides to his or her supervisor sufficient notice at least 48 hours in \nadvance of the absence.

In [8]:
chunks[0].metadata

{'source': 'VLCC', 'producer': 'HR'}

In [9]:
print(len(chunks))

9


## Step 3 - Create embedding for these chunks

In [10]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

## Step 4 - Create and store embeddings in vector database/store

In [11]:
from langchain_community.vectorstores import Chroma

vector_store = Chroma.from_documents(
    documents=chunks,
    embedding=embeddings
)

## Step 5 - Semantic search - Querying the vector store

In [12]:
output1 = vector_store.similarity_search("i am working in corporate office. how many minimum hours should i work in office?", k=3)
output1

[Document(metadata={'producer': 'HR', 'source': 'VLCC'}, page_content='5) If Punched in for less than 4 Hours 1  day will be deducted   \n6) Compensatory Off, if worked on Week Off Should be taken within 40 days \n            \n           \n1) Minimum working hours every day 8 Hours + 30 minutes break \n2) Minimum working hours every week 48 + 30 minutes break every day (as mentioned \nabove) \n3) Week starting from Monday to Saturday*      \n4) If Punched in for more than 4 Hours but less than 8 Hours 1/2  day will be deducted   \n5) If Punched in for less than 4 Hours 1  day will be deducted   \n6) Compensatory Off if worked on Week Off Should be taken within 40 days \n  \n \n* For Corporate – First and Fourth are working Saturdays. Corporate employees are required to work from Office on \nthese days as well. \n* For Skill – All Saturdays are working.'),
 Document(metadata={'producer': 'HR', 'source': 'VLCC'}, page_content='duty certificate from a doctor, prior to returning to work. 

## Talk to LLM

In [13]:
ai_msg = model.invoke(f"i am working in corporate office. how many minimum hours should i work in office? you can use the below context to answer the question: {output1}")
print(ai_msg.content[0]['text'])

Based on the documents provided, here are the minimum working hour requirements for a corporate office employee:

*   **Daily Minimum:** You are required to work a minimum of **8 hours per day**. 
*   **Break Time:** Depending on the specific section of the policy, there is an additional break of either **30 minutes or 1 hour** every day.
*   **Weekly Minimum:** You are required to work a minimum of **48 hours per week** (plus the daily break time).
*   **Working Days:** The week runs from Monday to Saturday. For Corporate employees, the **first and fourth Saturdays** of the month are mandatory working days.

**Important Attendance Rules:**
The policy specifies deductions if you do not meet the minimum daily hours:
*   **Less than 4 hours:** 1 full day will be deducted.
*   **Between 4 and 8 hours:** 1/2 day will be deducted.

In summary, to avoid any leave or salary deductions, you must punch in for at least **8 hours** of work per day.
