#### Pre-Processing

In [1]:
# Parse a PDF as well as a email invoice
from src.utils import preprocess_aor
pdf_path = "database/aor/AOR-sample1.pdf"
preprocess_aor(aor_dir="database/aor")

AOR pdf paths  ['database/aor/AOR-sample1.pdf']


#### Search & Query

In [2]:
# Chat with the preprocessed AOR for real-time chat based on RAG 
from src.aor import load_aors
from src.search import search_aor_with_item, search_aor_with_no


aor_list = load_aors()

# Quick routing: Item-based & Number-baseds

# item_query = "Hardware"
# matching_aors = search_aor_with_item(query, aor_list)

no_query = "DH928"
matching_aors = search_aor_with_no(no_query, aor_list)


print("Item-based AOR searching:")
for aor in matching_aors:
    print(f"AOR No: {aor.no}")
    print(f"Description: {aor.description}")
    print(f"Items: {aor.items}")
    print(f"Budgets: {aor.budgets}")
    print(f"Expiry Date: {aor.expiry_date}")
    print("---")


Item-based AOR searching:
AOR No: DH928/6/4
Description: Approval of requirement specifications for the implementation of Crew Mobile Digital Services Lite (CMDS-Lite) for First Flotilla, specifically for a Proof of Concept (PoC) trial on two Frigates (FFSes).
Items: ['Hardware for 2 FFSes', 'Software and Licenses for 2 FFSes', 'IT man-effort and Professional Services']
Budgets: [36000.0, 10000.0, 78000.0]
Expiry Date: 2024-05-16
---


#### RAG-based chat 

In [8]:
# How many budget is left in the AOR xxx 
# What kind of item is covered in AOR xxx

from typing import List
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

query = "How much is left in the budget of AOR DH928/6/4?"

def query_detail(aor, query):
    # RAG over details over an aor
    text = aor.pdf_text 
    
    # Initialize the sentence transformer model
    model = SentenceTransformer('all-MiniLM-L6-v2')
    
    # Encode the query and the AOR text
    query_embedding = model.encode([query])
    
    # Split the text into sentences
    sentences = text.split('.')
    sentence_embeddings = model.encode(sentences)
    
    # Calculate cosine similarity between query and sentences
    similarities = cosine_similarity(query_embedding, sentence_embeddings)[0]
    
    # Get the top 3 most relevant sentences
    top_indices = similarities.argsort()[-3:][::-1]
    relevant_sentences = [sentences[i].strip() for i in top_indices]
    
    # Construct the response
    response = "\n".join(relevant_sentences)
    
    return response

In [10]:
response = query_detail(aor, query)



In [11]:
print(response)

Table 1: Cost Breakdown  
S/N Description  Estimated Cost  
1 Hardware for 2 FFSes  $36,000 
2 Software and Licenses for 2 FFSes  $10,000 
3 IT man -effort and Professional Services  $78,000 
Total  $124,000  
 
5
The estimated contract value of this trial will not exceed  
$124,000  (see the breakdown in Table 1)
All 
hardware/infrastructure/software preparation/services costs shall be borne 
by the potential supplier
