![alt text](image.png)

### TODOS:

1. Add the tools for the datetime (acually no need because we are passing the current date time in the assistant prompt)

# Imports

In [1]:
import sys
sys.path.append('.')


In [2]:
# Let's create an example conversation a user might have with the assistant
tutorial_questions = [
    "Hi there, what time is my flight?",
    "Am i allowed to update my flight to something sooner? I want to leave later today.",
    "Update my flight to sometime next week then",
    "The next available option is great",
    "what about lodging and transportation?",
    "Yeah i think i'd like an affordable hotel for my week-long stay (7 days). And I'll want to rent a car.",
    "OK could you place a reservation for your recommended hotel? It sounds nice.",
    "yes go ahead and book anything that's moderate expense and has availability.",
    "Now for a car, what are my options?",
    "Awesome let's just get the cheapest option. Go ahead and book for 7 days",
    "Cool so now what recommendations do you have on excursions?",
    "Are they available while I'm there?",
    "interesting - i like the museums, what options are there? ",
    "OK great pick one and book it for my second day there.",
]


retrieval_questions = """
What is the company’s leave policy?
Can you explain the reimbursement guidelines?
Where can I find the company travel policy?
What are the rules for remote work eligibility?
Could you help me find the policy on expense approvals?
"""

hiring_questions = """
Can you schedule an interview for a shortlisted candidate?
What’s the current status of the candidate I referred?
How do I post a job opening for my department?
Can you help me shortlist candidates for the "JUNIOR PROMPT ENGINEER" role? 
Generate a hiring report for this month. 
How do I initiate the onboarding process for a new hire? 
Collect feedback from the panel about the last interview. 
I need to reschedule the interview for "SIDDHESH". Can you do that?
Can you generate an offer letter for the selected candidate?
"""

leave_questions = """
How do I apply for leave?
What’s the status of my leave request?
Can you cancel my leave request for [date]?
How many leave days do I have left?
Can you calculate my leave encashment for this year?
I need to modify my leave request to include [new date]. Can you help?
"""

payslip_questions = """
Can you get me my payslip for [month]?
What are the deductions on my last paycheck? 
Can you provide me a detailed breakdown of my salary? 
What are my tax details for this financial year?
"""

reimbursement_questions = """
What’s the status of my reimbursement request?
How do I submit a reimbursement request for my travel expenses?
Can you cancel my reimbursement request for [expense detail]?
"""

travel_questions = """
Can you book a flight for me to [destination] on [date]?
I need to cancel my flight ticket for [destination]. Can you do that?
What’s the status of my flight booking?
Can you find me accommodation in [city] for [dates]? 
What flight options are available for [destination] on [date]? 
Can you book a hotel for me near [location]? 
I need to cancel my hotel reservation. Can you help? 
Can you provide me a summary of my bookings? 
Generate an itinerary for my trip to [destination]. 
What’s the status of my hotel booking? 
Notify me if there are any updates on my travel bookings. 
Can you track my past travel bookings? 
Reschedule my hotel stay to start from [new date]. 
I need to reschedule my flight to [new date]. Can you help?
"""

user_profile_questions = """
Can you search for [Employee Name] in the directory? 
What are the directory stats for our team? 
Can you verify my profile details? 
Show me my profile information. 
Update my profile to include [new information]. 
Send a message to [Employee Name/Team]. 
How do I register a new employee in the system? 
Can you deactivate the profile for [Employee Name] who has left the company? 
Generate a contact list for my department. 
Add [Name] as my emergency contact.
""" 

session_questions = """
Can you provide the details of my current session?
What actions have been taken in this session so far?
Can you summarize today’s chat?
How do I access the details of previous sessions?
"""

In [3]:
import uuid
thread_id = str(uuid.uuid4())

config = {
    "configurable": {
        "user_id": "3442 587242",
        # Checkpoints are accessed by thread_id
        "thread_id": thread_id,
    }
}


# Specialized Workflows

![alt text](image-3.png)

In [4]:
from database.utils import db, update_dates
from graph.main import GRAPH
from utils.helper import _print_event
from langchain_core.messages import ToolMessage

ModuleNotFoundError: No module named 'rag'

In [None]:

# Update with the backup file so we can restart from the original place in each section
# db = update_dates(db)


_printed = set()
# We can reuse the tutorial questions from part 1 to see how it does.
for question in tutorial_questions:
    events = GRAPH.stream(
        {"messages": ("user", question)}, config, stream_mode="values"
    )
    for event in events:
        _print_event(event, _printed)
    snapshot = GRAPH.get_state(config)
    while snapshot.next:
        # We have an interrupt! The agent is trying to use a tool, and the user can approve or deny it
        # Note: This code is all outside of your graph. Typically, you would stream the output to a UI.
        # Then, you would have the frontend trigger a new run via an API call when the user has provided input.
        try:
            user_input = input(
                "Do you approve of the above actions? Type 'y' to continue;"
                " otherwise, explain your requested changed.\n\n"
            )
        except:
            user_input = "y"
        if user_input.strip() == "y":
            # Just continue
            result = GRAPH.invoke(
                None,
                config,
            )
        else:
            # Satisfy the tool invocation by
            # providing instructions on the requested changes / change of mind
            result = GRAPH.invoke(
                {
                    "messages": [
                        ToolMessage(
                            tool_call_id=event["messages"][-1].tool_calls[0]["id"],
                            content=f"API call denied by user. Reasoning: '{user_input}'. Continue assisting, accounting for the user's input.",
                        )
                    ]
                },
                config,
            )
        snapshot = GRAPH.get_state(config)


Hi there, what time is my flight?

Your flight, LX0112, is scheduled to depart from Charles de Gaulle Airport (CDG) at 10:42 AM and arrive at EuroAirport Basel-Mulhouse-Freiburg (BSL) at 12:12 PM on January 15, 2025. 

Please note that the current time is 10:38 AM, which means your flight is departing soon. Make sure to head to the airport if you haven't already!

Am i allowed to update my flight to something sooner? I want to leave later today.
Tool Calls:
  lookup_policy (call_P4GG0k7zNl6pJGBbv7wiwSXa)
 Call ID: call_P4GG0k7zNl6pJGBbv7wiwSXa
  Args:
    query: flight change policy
Name: lookup_policy


## Booking and Cancellation

1. How can I change my booking?
	* The ticket number must start with 724 (SWISS ticket no./plate).
	* The ticket was not paid for by barter or voucher (there are exceptions to voucher payments; if the ticket was paid for in full by voucher, then it may be possible to rebook online under certain circumstances. If it is not possible to rebook online because 

In [11]:
url = 'https://hrbotapi.chapterapps.ai/api/v1/auth/token'
user_id = 18
import requests
response = requests.post(url, params={"user_id": user_id})
response = response.json()
access_token = response['access_token']
token_type = response['token_type']

token_type, access_token

('bearer',
 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxOCIsImV4cCI6MTc2ODA0NTAxNX0.QM1gLU3nYiL1OL2niwEEUEPR1ijxtE9YiBQ3dFei8oU')

In [12]:
URL = ' https://hrbotapi.chapterapps.ai/api/v1/paysheet/paysheet'
# call the api to get the paysheet details with ?start_date=12-2024&end_date=12-2024
start_date = '08-2024'
end_date = '08-2024'    
# {'detail': 'Not authenticated'}
response = requests.get(URL, params={'start_date': start_date, 'end_date': end_date}, headers={'Authorization': f'{token_type} {access_token}'})
print(response.status_code == 200)
response = response.json()
response


True


{'start_date': '08-2024',
 'end_date': '08-2024',
 'total_records': 1,
 'paysheets': [{'id': 18,
   'ecode': '227',
   'date': '08-2024',
   'department': 'Sales',
   'designation': 'Area Sales Executive',
   'bank_details': {'bank_name': 'ICICI Bank',
    'account_no': None,
    'ifsc_code': 'ICIC0000'},
   'attendance': {'worked_days': 31, 'lop_days': True, 'rlop': True},
   'earnings': {'basic_salary': '28,667',
    'hra': '12,834',
    'conveyance': '3,333',
    'special_allowance': '20,533',
    'field_expenses': '-',
    'total': '65,367'},
   'deductions': {'income_tax': '-',
    'pf': '1,800',
    'other': '12,000',
    'total': '13,800'},
   'net_pay': '51,567',
   'remarks': '618,804'}]}

In [4]:
from utils.db_utils import execute_query

# get the user_id from paysheet table
query = "SELECT * FROM paysheet;"
execute_query(query, {})


Connected to PostgreSQL database successfully.


[{'Sl No.': 1.0,
  'Code': '210',
  'Name': 'Aman Kumar',
  'DOJ': '2017-01-04 00:00:00',
  'Relieved Date': None,
  'Department': 'Finance',
  'Designation': 'Manager',
  'Bank name': 'ICICI Bank',
  'Bank Ac No': None,
  'IFSC Code': 'ICIC0000',
  'Resigned Date': None,
  'Emp Worked days': 31.0,
  'LOP Days': 0.0,
  'RLOP': 0.0,
  'BASIC': '20800',
  'House Rent Allowance': '10400',
  'Conveyance Allowance': 3333,
  'Special Allowance': '10000',
  'Field Expenses': 0,
  'Total Earnings': '44533',
  'Income Tax': '0',
  'Other Deductions': None,
  'Provident Fund': 2496,
  'Total Deductions': '2496',
  'NETPAY': '42037',
  'Remarks': '5,04,444'},
 {'Sl No.': 2.0,
  'Code': '211',
  'Name': 'Shiddhart Singh',
  'DOJ': '21/02/2016',
  'Relieved Date': None,
  'Department': 'Finance',
  'Designation': 'Senior Manager',
  'Bank name': 'ICICI Bank',
  'Bank Ac No': None,
  'IFSC Code': 'ICIC0000',
  'Resigned Date': None,
  'Emp Worked days': 31.0,
  'LOP Days': 0.0,
  'RLOP': 0.0,
  'BAS

In [3]:
from tools.salary.salary_breakdown import get_salary_breakdown

# Create an instance of the schema with the required data
salary_breakdown_input = {"user_id": 1}

# Call the function with the correct input
get_salary_breakdown(salary_breakdown_input)

SELECT * FROM master_reco WHERE id = 1
Connected to PostgreSQL database successfully.


{'id': '1',
 'Code': '210',
 'Emp_Name': 'Aman Kumar',
 'BASIC': 20800.0,
 'House Rent Allowance': 10400.0,
 'Conveyance Allowance': 3333.0,
 'Special Allowance': 10000.0,
 'PF Employer': 2496.0,
 'CTC PM': 42037.0,
 'FIX CTC': 0.0,
 'VP': 0.0,
 'CTC PA': 0.0}

In [4]:
from utils.db_utils import execute_query
query = "SELECT name FROM sqlite_master WHERE type='table';"
execute_query(query, {})


Connected to PostgreSQL database successfully.


[{'name': 'paysheet'},
 {'name': 'it_reco'},
 {'name': 'ctc_reco'},
 {'name': 'pf_reco'},
 {'name': 'master_reco'},
 {'name': 'other_deduction'},
 {'name': 'reimbursements'},
 {'name': 'july_reimbursements'},
 {'name': 'aug_reimbursements'}]

In [5]:
query = "Pragma table_info(other_deduction);"
execute_query(query, {})

Connected to PostgreSQL database successfully.


[{'cid': 0,
  'name': 'id',
  'type': 'TEXT',
  'notnull': 0,
  'dflt_value': None,
  'pk': 0},
 {'cid': 1,
  'name': 'Code',
  'type': 'INTEGER',
  'notnull': 0,
  'dflt_value': None,
  'pk': 0},
 {'cid': 2,
  'name': 'Name',
  'type': 'TEXT',
  'notnull': 0,
  'dflt_value': None,
  'pk': 0},
 {'cid': 3,
  'name': 'Deductions',
  'type': 'INTEGER',
  'notnull': 0,
  'dflt_value': None,
  'pk': 0},
 {'cid': 4,
  'name': 'Reason',
  'type': 'TEXT',
  'notnull': 0,
  'dflt_value': None,
  'pk': 0}]

In [2]:
from tools.salary.deduction import get_deduction_info

input_data = {"user_id": 1, "amount": 6000}
get_deduction_info.invoke(input_data)

SELECT * FROM other_deduction WHERE id = 1 and Deductions = 6000.0
Connected to PostgreSQL database successfully.


{'Name': 'Amit ', 'Deductions': 6000, 'Reason': 'Screen Damage'}

In [9]:

markdown_file_path = 'dump\\markdown_data'
# from langchain_text_splitters import MarkdownHeaderTextSplitter
# markdown_document = "# Foo\n\n    ## Bar\n\nHi this is Jim\n\nHi this is Joe\n\n ### Boo \n\n Hi this is Lance \n\n ## Baz\n\n Hi this is Molly"

# headers_to_split_on = [
#     ("#", "Header 1"),
#     ("##", "Header 2"),
#     ("###", "Header 3"),
# ]

# markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
# md_header_splits = markdown_splitter.split_text(markdown_document)
# md_header_splits

import os 
# import nltk
# nltk.download('punkt_tab')
# nltk.download('averaged_perceptron_tagger_eng')
docs = []
from langchain_community.document_loaders import UnstructuredMarkdownLoader
for file in os.listdir(markdown_file_path):
    loader = UnstructuredMarkdownLoader(f"{markdown_file_path}\\{file}" , mode="elements",  strategy="fast")
    
    data = loader.load_and_split()
    
    docs += data


In [10]:
docs

[Document(metadata={'source': 'dump\\markdown_data\\Bonus and Increment Policy.md', 'emphasized_text_contents': ['Bonus and Increment Policy'], 'emphasized_text_tags': ['b'], 'languages': ['eng'], 'file_directory': 'dump\\markdown_data', 'filename': 'Bonus and Increment Policy.md', 'filetype': 'text/markdown', 'last_modified': '2025-01-03T17:01:48', 'category': 'UncategorizedText', 'element_id': '8e08f3fbc97fc0a9901453d9f0292366'}, page_content='\ufeffBonus and Increment Policy'),
 Document(metadata={'source': 'dump\\markdown_data\\Bonus and Increment Policy.md', 'category_depth': 1, 'emphasized_text_contents': ['Purpose:'], 'emphasized_text_tags': ['b'], 'languages': ['eng'], 'file_directory': 'dump\\markdown_data', 'filename': 'Bonus and Increment Policy.md', 'filetype': 'text/markdown', 'last_modified': '2025-01-03T17:01:48', 'category': 'ListItem', 'element_id': '9c5d794a8b593b1e663167d0b851dddd'}, page_content='Purpose: The purpose of this policy is to establish a framework for aw

In [2]:
state = {
    'a': 1,
    'b': 2,
    'messages': ['a', 'b', 'c']
}

messages = ['d', 'e', 'f']
state = {**state, "messages": messages}
state

{'a': 1, 'b': 2, 'messages': ['d', 'e', 'f']}

In [9]:
K = "s"
namestr(K)

NameError: name 'namestr' is not defined

# Retrieval NOT WORKING

In [1]:
from langchain_core.tools import tool
import os 
# import nltk
# nltk.download('punkt_tab')
# nltk.download('averaged_perceptron_tagger_eng')

from dotenv import load_dotenv
load_dotenv(override=True)
from typing import List
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
from langchain_community.document_loaders import UnstructuredMarkdownLoader
from langchain_text_splitters import MarkdownHeaderTextSplitter



class MarkdownVectorDB:
    def __init__(
        self, persist_directory: str=".\\dump\\vector_store_data", markdown_folder: str="markdown_data"
    ):
        self.persist_directory = persist_directory
        self.markdown_folder = markdown_folder
        self.embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
        self.text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=700, chunk_overlap=150
        )
        # self.markdown_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)


        self.vector_store = None

    def _initialize_vector_store(self):

        return Chroma(
            embedding_function=self.embeddings, persist_directory=self.persist_directory
        )

    def process_markdown_files(self):
        documents = []

        for filename in os.listdir(self.markdown_folder):
            if filename.endswith(".md"):
                filepath = os.path.join(self.markdown_folder, filename)

                # loader = UnstructuredMarkdownLoader(filepath , mode="elements",  strategy="fast")
                # data = loader.load_and_split(text_splitter=self.text_splitter)
                
                # for chunk in data:
                #     print(chunk.page_content)
                #     input()
                #     keywords = self.extract_keywords(chunk.page_content)
                #     chunk.metadata["keywords"] = ",".join(keywords)
                #     documents.append(chunk)
                with open(filepath, "r", encoding="utf-8") as file:
                    content = file.read()
                    chunks = self.text_splitter.split_text(content)
                    # chunks = self.markdown_splitter.split_text(content)
                   
                    for chunk in chunks:
                        keywords = self.extract_keywords(chunk)
                        doc = Document(page_content=chunk, metadata={
                            "filename": filename,
                            "keywords": ",".join(keywords)
                        })
                        
                        documents.append(doc)

        return documents

    def generate_vector_database(self):
        documents = self.process_markdown_files()
        self.vector_store = self._initialize_vector_store()
        self.vector_store.add_documents(documents)
        # self.vector_store.persist()
        print(f"Processed and added {len(documents)} chunks to the vector database.")

    def load_vector_database(self):
        print("Loading existing vector database...")
        self.vector_store = self._initialize_vector_store()

    def retrieve_documents(self, query: str, top_k: int = 5) -> List[Document]:
        if self.vector_store is None:
            raise ValueError(
                "Vector store is not initialized. Load or generate the vector store first."
            )
        print('&&&&&&&&&&&&&&&&&&&&&&')
        print("retrive documents invoked")
        results = self.vector_store.similarity_search(query, k=top_k)
        print("results", results)
        return results

    @staticmethod
    def extract_keywords(chunk: str, top_n: int = 10) -> List[str]:
        documents = [chunk]
        tfidf_vectorizer = TfidfVectorizer(stop_words="english", max_features=top_n)
        tfidf_matrix = tfidf_vectorizer.fit_transform(documents)
        feature_array = np.array(tfidf_vectorizer.get_feature_names_out())
        tfidf_sorting = np.argsort(tfidf_matrix.toarray()).flatten()[::-1]
        top_keywords = feature_array[tfidf_sorting][:top_n]
        return top_keywords.tolist()

    def recreate_or_load_vector_db(self, recreate:bool=False):
        if not os.path.exists(self.persist_directory) or recreate:
            os.makedirs(self.persist_directory, exist_ok=True)
            print("Recreating vector database...")
            self.generate_vector_database()
        else:
            print("Loading vector database...")
            self.load_vector_database()


# markdown_file_path = 'dump\\markdown_data'


# vector_db = MarkdownVectorDB(markdown_folder=markdown_file_path)
# vector_db.recreate_or_load_vector_db(recreate=False)



vector_store = Chroma(embedding_function=OpenAIEmbeddings(model="text-embedding-3-large"), 
                      persist_directory=".\\dump\\vector_store_data")

print(vector_store)

retriever = vector_store.as_retriever()
print(retriever)

print(vector_store.similarity_search("leave policy", k=5))



<langchain_chroma.vectorstores.Chroma object at 0x000002A01DCA8910>
tags=['Chroma', 'OpenAIEmbeddings'] vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000002A01DCA8910> search_kwargs={}
[Document(metadata={'filename': 'HR MANUAL VER 15 - 2024 - Draft.md', 'keywords': 'leave,casual,privilege,treated,subsequent,single,sick,sanctioned,salary,1000'}, page_content='v. Leave for 1-2 days is treated as Casual Leave and NOT privilege leave. However, if there are no casual leaves remaining, then privilege leave may be sanctioned. But this would require an approval from the Department Head.\nvi. If an employee does not take a single Sick Leave/Casual Leave in a Calendar Year, Rs 1000/- reward is given to him / her, which is credited to his/her bank account along with the subsequent salary month.'), Document(metadata={'filename': 'HR MANUAL VER 15 - 2024 - Draft.md', 'keywords': 'leave,overtime,hours,shortfall,performance,ii,day,considered,aye,aggregated'}, page_content="ii. If any

In [1]:
l = [1,2,3]
l2 = [4,5,6]

l3 = [*l, *l2]
l3

[1, 2, 3, 4, 5, 6]

In [7]:
from typing import Optional, Dict, Any, List
from datetime import date, datetime
from langchain_core.tools import tool

from dotenv import load_dotenv
load_dotenv(override=True)
import json
import re
import os


from openai import OpenAI, AsyncOpenAI
from pathlib import Path
client = OpenAI()
link = 'https://arxiv.org/html/2501.13021v1'

response: Any = client.chat.completions.create(
    model= "gpt-4o-mini",
    messages=[
        # {"role": "system", "content": self.system_prompts[use]},
        {"role": "system", "content": f"give abstract for the paper. Use open_url() to read and summarize this page"},
        {"role": "user", "content": link},
    ],
    temperature=0.0,
)
message = response.choices[0].message.content
message


"I'm unable to access external websites directly, including the URL you provided. However, if you can share the main points or sections of the paper, I would be happy to help you create an abstract based on that information!"

In [8]:
lst = [1,2,3,4,5,6]
lst

[1, 2, 3, 4, 5, 6]

In [11]:
lst[:-3]

[1, 2, 3]

In [21]:
from pydantic import BaseModel, model_validator

class SchemaMessage(BaseModel):
    MESSAGE_THRESHOLD: int
    LAST_K_CONTINUATION_MESSAGES: int 

    # Corrected validator
    @model_validator(mode='before')
    @classmethod
    def validate_threshold(cls, values):
        if values["MESSAGE_THRESHOLD"] < values["LAST_K_CONTINUATION_MESSAGES"]:
            raise ValueError("MESSAGE_THRESHOLD must be greater than LAST_K_CONTINUATION_MESSAGES")
        return values

# Test case
message = SchemaMessage(MESSAGE_THRESHOLD=5, LAST_K_CONTINUATION_MESSAGES=3)  # ✅ Valid
# message = SchemaMessage(MESSAGE_THRESHOLD=2, LAST_K_CONTINUATION_MESSAGES=3)  # ❌ Raises ValueError
