Imports

In [27]:
import pandas as pd
import json
import re
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from typing import List, Dict, Any

DataSet PReprocessing

In [28]:
# Load the Excel sheets
dimensioning_df = pd.read_csv("dimension_flavor_25A_25B_26A.csv")

df_docStrList = []
df_map_list = []

for idx, row in dimensioning_df.iterrows():
    map={}
    operator = row['Operator']
    network_function = row['Network Function']
    dimensioning_flavour = row['Dimensioning Flavor']
    package = row['Package']
    dpp = row['DPP']
    dip = row['DIP']
    dmp = row['DMP']
    cmp = row['CMP']
    pmp = row['PMP']
    rmp = row['RMP']
    ipp = row['IPP']
    map.update({'Operator': operator, 'Network Function': network_function, 'Dimensioning Flavor': dimensioning_flavour, 'Package': package, 'DPP': dpp, 'DIP': dip, 'DMP': dmp, 'CMP': cmp, 'PMP': pmp, 'RMP': rmp, 'IPP': ipp})
    df_map_list.append(map)
    content = f"Operator: {operator}\n"
    content += f"Network Function: {network_function}\n"
    content += f"Dimensioning Flavour: {dimensioning_flavour}\n"
    content += f"Package: {package}\n"
    content += f"DPP: {dpp}\n"
    content += f"DIP: {dip}\n"
    content += f"DMP: {dmp}\n"
    content += f"CMP: {cmp}\n"
    content += f"PMP: {pmp}\n"
    content += f"RMP: {rmp}\n"
    content += f"IPP: {ipp}\n"

    df_docStrList.append(content.strip())

df_map_list
# df_docStrList

[{'Operator': 'Verizon',
  'Network Function': 'ADPF',
  'Dimensioning Flavor': 'medium-tdd-regular-spr',
  'Package': '25A',
  'DPP': 'medium-tdd-regular-spr',
  'DIP': 'medium-1.8m-spr',
  'DMP': 'medium-uni',
  'CMP': 'medium-uni',
  'PMP': 'medium-uni',
  'RMP': 'medium-uni',
  'IPP': nan},
 {'Operator': 'VOS',
  'Network Function': 'uADPF',
  'Dimensioning Flavor': 'medium-regular-spr-t21',
  'Package': '25A',
  'DPP': 'fdd-270m-18c-gsm-6trx-spr',
  'DIP': 'medium-uni',
  'DMP': 'medium-uni',
  'CMP': 'medium-uni',
  'PMP': 'medium-uni',
  'RMP': 'medium-uni',
  'IPP': 'medium'},
 {'Operator': 'VOS',
  'Network Function': 'uADPF',
  'Dimensioning Flavor': 'small-tdd-6fh-3fhm',
  'Package': '25B',
  'DPP': 'small-tdd',
  'DIP': 'tiny',
  'DMP': 'medium',
  'CMP': 'medium',
  'PMP': 'medium',
  'RMP': 'medium',
  'IPP': nan},
 {'Operator': 'VOS',
  'Network Function': 'uADPF',
  'Dimensioning Flavor': 'medium-regular-spr-t20',
  'Package': '25B',
  'DPP': 'medium-slim-gsm-spr',
  'D

In [29]:
# Load the Excel sheets
pod_flavor_df = pd.read_csv("pod_flavors_25A_25B_EU_US.csv")

pf_docStrList = []
pf_map_list = []

for idx, row in pod_flavor_df.iterrows():
    map={}
    pod_type = row['Pod type']
    pod_flavor = row['Pod flavor']
    vCPU_req = row['vCPU Request (vCore)']
    vCPU_limit = row['vCPU Limit (vCore)']
    vMemory = row['vMemory (GB)']
    hugepage = row['Hugepage (GB)']
    per_vol = row['Persistent Volume (GB)']
    map.update({'Pod type': pod_type, 'Pod flavor': pod_flavor, 'vCPU Request (vCore)': vCPU_req, 'vCPU Limit (vCore)': vCPU_limit, 'vMemory (GB)': vMemory, 'Hugepage (GB)': hugepage, 'Persistent Volume (GB)': per_vol})
    pf_map_list.append(map)
    content = f"Pod type: {pod_type}\n"
    content += f"Pod flavor: {pod_flavor}\n"
    content += f"vCPU Request (vCore): {vCPU_req}\n"
    content += f"vCPU Limit (vCore): {vCPU_limit}\n"
    content += f"vMemory (GB): {vMemory}\n"
    content += f"Hugepage (GB): {hugepage}\n"
    content += f"Persistent Volume (GB): {per_vol}\n"

    pf_docStrList.append(content.strip())

# pf_docStrList

In [30]:
dfDocList = [Document(page_content = doc) for doc in df_docStrList]
pfDocList = [Document(page_content = doc) for doc in pf_docStrList]

In [31]:
# Define the path to the pre-trained model you want to use
modelPath = "all-MiniLM-L12-v2/"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cpu'}

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

In [32]:
dfdb = FAISS.from_documents(dfDocList, embeddings)
pfdb = FAISS.from_documents(pfDocList, embeddings)

In [33]:
dfdb.save_local("dfvectordb.bin")
pfdb.save_local("pfvectordb.bin")

In [34]:
dfdbIndex = FAISS.load_local("dfvectordb.bin", embeddings, allow_dangerous_deserialization=True)
pfdbIndex = FAISS.load_local("pfvectordb.bin", embeddings, allow_dangerous_deserialization=True)

In [35]:
# query = "fdd-120m-12c-gsm-8trx-spr"
# testdoc = dfdbIndex.similarity_search(query, k = 4)
# # print(testdoc[0].page_content)
# for i in testdoc:
#     print(i.page_content)
#     print()

In [36]:
# query1 = "fdd-270m-18c-gsm-6trx-spr"
# query2 = "medium-uni"
# testdoc = pfdbIndex.similarity_search(query2, k = 3)
# for i in testdoc:
#     print(i.page_content)
#     print()

Lexical and Semantic Search Functions

In [37]:
# Function to preprocess the LLM output to extract relevant information in a structured format. This function assumes that the LLM output contains specific phrases indicating the dimensioning flavor, network function, and pod details. It extracts these details and formats them into a dictionary. The function uses regular expressions to identify and extract the required information from the LLM output. The extracted information includes the dimensioning flavor, network function, and a list of pods along with their respective flavors. The function returns a dictionary containing these details.
import re
def preprocess_df_data(llm_output: str):
    dimensioning_flavor = "Not Available"
    network_function = "Not Available"
    pods = []

    lines = llm_output.strip().splitlines()

    for line in lines:
        # Match dimensioning flavor
        if re.search(r'Dimensioning Flavo[u]?r', line, re.IGNORECASE):
            match = re.search(r'Dimensioning Flavo[u]?r\s*[:\-]\s*(.+)', line, re.IGNORECASE)
            if match:
                dimensioning_flavor = match.group(1).strip()

        # Match network function
        elif re.search(r'Network Function', line, re.IGNORECASE):
            match = re.search(r'Network Function\s*[:\-]\s*(.+)', line, re.IGNORECASE)
            if match:
                network_function = match.group(1).strip()

        # Match pods - Fixed regex pattern
        else:
            # Look for lines with pattern: - PodName: value
            match = re.match(r'\s*-\s*([A-Za-z]{2,4}):\s*(.+)', line.strip())
            if match:
                pod_name = match.group(1).strip()
                pod_flavor = match.group(2).strip()
                
                # Only add if it looks like a pod (contains 'p' and isn't "Package")
                if 'p' in pod_name.lower() and pod_name.lower() != 'package':
                    pods.append({
                        'pod_name': pod_name,
                        'pod_flavor': pod_flavor
                    })

    return {
        "dimensioning_flavor": dimensioning_flavor,
        "network_function": network_function,
        "pods": pods
    }

In [38]:
# Function to extract dimensioning flavor information from a query using Semantic search. This function takes a query as input and performs a similarity search using the FAISS index to find the most relevant document. It then returns the retrieved document containing the dimensioning flavor information.
def extract_dimension_flavor_info2(query: str):
    dimension_flavor_data = ""
    dimension_flavor_data = dfdbIndex.similarity_search(query,k=1)
    return dimension_flavor_data
    

In [39]:
#Function to convert a list of dictionaries to a context string suitable for LLM consumption. This function takes a list of dictionaries and a title as input. It iterates over the dictionaries, formats the key-value pairs, and constructs a context string with the formatted information. The context string includes a title and each dictionary's key-value pairs in a structured format. The function returns the constructed context string.
def dict_to_context(data_dict_list, title="Context Information"):
    """Convert dictionary to LLM-friendly context format with simple bullet points"""
    context_lines = [f"## {title}\n"]
    
    for i, data_dict in enumerate(data_dict_list, 1):
        context_lines.append(f"### Item {i}")
        for key, value in data_dict.items():
            # Format key nicely (replace underscores, capitalize)
            formatted_key = key.replace('_', ' ').title()
            context_lines.append(f"- {formatted_key}: {value}")
        context_lines.append("")  # Add blank line between items
    
    return "\n".join(context_lines)

In [None]:
"""
FINAL PRODUCTION FIELD PARSER
============================

‚úÖ Handles spelling mistakes (operatr -> operator, flavour -> flavor)
‚úÖ Multiple values per field  
‚úÖ Context separation (ignores explanatory text)
‚úÖ Complex query structures
‚úÖ Fuzzy field matching with edit distance
‚úÖ False positive filtering

This is your ready-to-use production function.
"""

import re
from typing import Dict, Set, List, Optil

def levenshtein_distance(s1: str, s2: str) -> int:
    """Calculate the Levenshtein edit distance between two strings."""
    if len(s1) < len(s2):
        return levenshtein_distance(s2, s1)
    if len(s2) == 0:
        return len(s1)

    previous_row = list(range(len(s2) + 1))
    for i, c1 in enumerate(s1):
        current_row = [i + 1]
        for j, c2 in enumerate(s2):
            insertions = previous_row[j + 1] + 1
            deletions = current_row[j] + 1
            substitutions = previous_row[j] + (c1 != c2)
            current_row.append(min(insertions, deletions, substitutions))
        previous_row = current_row
    
    return previous_row[-1]

def fuzzy_match_score(candidate: str, target: str) -> float:
    """
    Calculate fuzzy match score between candidate and target field names.
    Handles spelling mistakes using edit distance and word similarity.
    """
    candidate_lower = candidate.lower().strip()
    target_lower = target.lower().strip()
    
    # Exact match
    if candidate_lower == target_lower:
        return 1.0
    
    # Handle British vs American spellings
    candidate_norm = candidate_lower.replace('flavour', 'flavor').replace('colour', 'color')
    target_norm = target_lower.replace('flavour', 'flavor').replace('colour', 'color')
    
    if candidate_norm == target_norm:
        return 0.95
    
    # Calculate edit distance similarity
    distance = levenshtein_distance(candidate_lower, target_lower)
    max_len = max(len(candidate_lower), len(target_lower))
    
    if max_len == 0:
        return 0.0
    
    similarity = 1.0 - (distance / max_len)
    
    # Word-based matching for compound fields
    candidate_words = set(candidate_lower.split())
    target_words = set(target_lower.split())
    
    if candidate_words and target_words:
        word_overlap = len(candidate_words.intersection(target_words))
        total_words = len(candidate_words.union(target_words))
        word_similarity = word_overlap / total_words if total_words > 0 else 0
        similarity = 0.6 * similarity + 0.4 * word_similarity
    
    return similarity

def clean_query(query: str) -> str:
    """Clean query by removing extra characters that interfere with parsing."""
    query = re.sub(r'"{2,}', '"', query)  # Remove multiple quotes
    query = re.sub(r'[(){}\[\]]', '', query)  # Remove brackets
    query = re.sub(r'\s+', ' ', query)  # Normalize spaces
    return query.strip()

def separate_context_from_query(query: str) -> str:
    """Separate main query from contextual/explanatory information."""
    context_markers = [
        r'just\s+for\s+the\s+context',
        r'these\s+all\s+are',
        r'also\s+called\s+as',
        r'strings\s+like'
    ]
    
    earliest_pos = len(query)
    for marker in context_markers:
        match = re.search(marker, query, re.IGNORECASE)
        if match and match.start() < earliest_pos:
            earliest_pos = match.start()
    
    return query[:earliest_pos].strip() if earliest_pos < len(query) else query

def extract_field_value_assignments(query: str) -> List[tuple]:
    """
    Extract field-value assignments using character-by-character scanning.
    This approach reliably finds field=value and field:"value" patterns.
    """
    pairs = []
    
    i = 0
    while i < len(query):
        if query[i] in '=:':
            # Found assignment operator
            
            # Look backwards for field name
            field_start = i - 1
            while field_start >= 0 and query[field_start].isspace():
                field_start -= 1  # Skip whitespace
            
            if field_start >= 0:
                # Find start of field name (go back to word boundary)
                field_end = field_start + 1
                while field_start >= 0 and (query[field_start].isalnum() or query[field_start] in ' _-'):
                    field_start -= 1
                field_start += 1
                
                field_candidate = query[field_start:field_end].strip()
                
                # Look forwards for value
                value_start = i + 1
                while value_start < len(query) and query[value_start].isspace():
                    value_start += 1  # Skip whitespace
                
                if value_start < len(query):
                    # Handle quoted values
                    if query[value_start] in '"\'':
                        quote_char = query[value_start]
                        value_end = value_start + 1
                        while value_end < len(query) and query[value_end] != quote_char:
                            value_end += 1
                        if value_end < len(query):
                            value = query[value_start + 1:value_end]
                            pairs.append((field_candidate, value))
                            i = value_end + 1
                            continue
                    else:
                        # Handle unquoted values
                        value_end = value_start
                        while (value_end < len(query) and 
                               query[value_end] not in ' ,\n\r\t' and
                               not (value_end < len(query) - 3 and query[value_end:value_end+4].lower() == ' and')):
                            value_end += 1
                        
                        value = query[value_start:value_end].strip()
                        if (len(value) >= 2 and 
                            value.lower() not in {'and', 'or', 'the', 'for', 'with'}):
                            pairs.append((field_candidate, value))
                        
                        i = value_end
                        continue
        i += 1
    
    # Clean and validate field candidates
    cleaned_pairs = []
    for field_candidate, value in pairs:
        # Remove common words from field candidate
        words = field_candidate.split()
        filtered_words = []
        for word in words:
            if word.lower() not in {'for', 'the', 'and', 'or', 'extract', 'information', 'following'}:
                filtered_words.append(word)
        
        if filtered_words:
            clean_field = ' '.join(filtered_words)
            
            # Additional validation to avoid false positives
            if (len(clean_field.split()) <= 3 and  # Max 3 words for field names
                len(value) >= 2 and
                not value.startswith('<') and  # Avoid template placeholders
                not re.match(r'^[0-9]+\.$', value)):  # Avoid numbered lists
                cleaned_pairs.append((clean_field, value))
    
    return cleaned_pairs

def find_best_field_match_fuzzy(candidate_field: str, available_fields: Set[str]) -> Optional[str]:
    """Find best matching available field using fuzzy matching with edit distance."""
    best_match = None
    best_score = 0.0
    min_score = 0.5  # Threshold for accepting matches
    
    for available_field in available_fields:
        score = fuzzy_match_score(candidate_field, available_field)
        if score > best_score and score >= min_score:
            best_score = score
            best_match = available_field
    
    return best_match

def parse_query_for_fields(query: str, available_fields: Set[str]) -> Dict[str, List[str]]:
    """
    Parse natural language query to extract field-value pairs.
    
    PRODUCTION-READY FEATURES:
    ‚úÖ Spelling mistake tolerance (uses edit distance)
    ‚úÖ Multiple values per field support
    ‚úÖ Context separation (ignores explanatory text)
    ‚úÖ Handles complex query structures
    ‚úÖ Fuzzy field name matching
    ‚úÖ False positive filtering
    
    Args:
        query (str): Natural language query (may contain spelling mistakes)
        available_fields (Set[str]): Set of exact available field names
    
    Returns:
        Dict[str, List[str]]: Extracted field-value pairs with multiple values support
        
    Examples:
        >>> fields = {'Operator', 'Dimensioning Flavor'}
        >>> parse_query_for_fields("operatr=VOS and dimensioning flavour=large", fields)
        {'Operator': ['VOS'], 'Dimensioning Flavor': ['large']}
    """
    # Step 1: Clean and prepare query
    query_cleaned = clean_query(query)
    main_query = separate_context_from_query(query_cleaned)
    
    # Step 2: Extract field-value assignments
    field_value_pairs = extract_field_value_assignments(main_query)
    
    # Step 3: Map to available fields using fuzzy matching
    field_criteria = {}
    
    for field_candidate, value in field_value_pairs:
        matched_field = find_best_field_match_fuzzy(field_candidate, available_fields)
        
        if matched_field:
            if matched_field in field_criteria:
                if value not in field_criteria[matched_field]:
                    field_criteria[matched_field].append(value)
            else:
                field_criteria[matched_field] = [value]
    
    return field_criteria

Extract dimension flavor info from query using Lexical search

In [None]:
#Function to extract dimension flavor amd pod flavor info from query using Lexical search


def extract_documents_from_query(documents: List[Dict], query: str) -> List[Any]:
    """
    Extract documents based on a natural language query by automatically
    identifying field-value pairs in the query.
    
    Returns:
        List: List of matching Document objects
    """
    # First, get all available fields from the documents
    available_fields = documents[0].keys() if documents else set()
    
    # Parse the query to extract field-value pairs
    # field_criteria = parse_query_for_fields(query, available_fields)
    field_criteria =parse_query_for_fields(query,available_fields) 
    
    
    if not field_criteria:
        print("No valid field-value pairs found in the query.")
        return []
    
    #print(f"Extracted criteria from query: {field_criteria}")
    
    # Find matching documents
    return find_matching_documents(documents, field_criteria)

def get_all_fields_from_documents(document: Any) -> set:
    """
    Extract all unique field names from all documents.
    
    Args:
        documents (List): List of Document objects
    
    Returns:
        set: Set of all unique field names
    """
    all_fields = set()
    
    lines = document.page_content.strip().split('\n')
    for line in lines:
        if ':' in line:
            field_name = line.split(':', 1)[0].strip()
            all_fields.add(field_name)
    
    return all_fields




# def find_closest_field_match(query_field: str, available_fields: set) -> str:
#     """
#     Find the closest matching field name from available fields.
    
#     Args:
#         query_field (str): Field name from query
#         available_fields (set): Available field names
    
#     Returns:
#         str: Closest matching field name or None
#     """
#     query_field_lower = query_field.lower().strip()
    
#     # Exact match
#     for field in available_fields:
#         if field.lower() == query_field_lower:
#             return field
    
#     # Partial match
#     for field in available_fields:
#         if query_field_lower in field.lower() or field.lower() in query_field_lower:
#             return field
    
#     # Keywords mapping for common variations
#     field_keywords = {
#         'dimension': 'Dimensioning Flavour',
#         'dimensioning': 'Dimensioning Flavour',
#         'flavour': 'Dimensioning Flavour',
#         'flavor': 'Dimensioning Flavour',
#         'operator': 'Operator',
#         'network': 'Network Function',
#         'function': 'Network Function',
#         'package': 'Package',
#     }
    
#     for keyword, field_name in field_keywords.items():
#         if keyword in query_field_lower and field_name in available_fields:
#             return field_name
    
#     return None

def find_matching_documents(documents: List[Dict], field_criteria: Dict[str, str]) -> List[Any]:
    """
    Find documents that match the given field criteria.
    
    Args:
        documents (List): List of Document objects
        field_criteria (Dict): Field-value pairs to match
    
    Returns:
        List: List of matching Document objects
    """
    match_documents= []

    for doc in documents:  
        isMatch = True  
        for field_name, field_values in field_criteria.items():  
            if field_name not in doc:  
                isMatch = False  
                break  
            match_found = False  
            for value in field_values:  
                if str(doc[field_name]).lower() == str(value).lower():  
                    match_found = True  
                    break  # Exit inner loop on first match  
            if not match_found:  
                isMatch = False  
                break  # Exit field loop if no match  
        if isMatch:  
            match_documents.append(doc)  
    
    return match_documents

In [15]:
#Function to extract pod flavor info from query using lexical search
def extract_pod_flavor_info(df_result: dict):
    relData = ""
    res=[]
    for i in df_result['pods']:
        query ='Pod type' + "=" + i['pod_name'] + ',' + "Pod flavor" +"=" + i['pod_flavor']
        #print(query)
        extracted_documents = extract_documents_from_query(pf_map_list,query)
        res+=extracted_documents
            # res = pfdbIndex.similarity_search(query)
        # print(res)

    relData = relData + dict_to_context(res)  + "\n"
    
    print(relData)
    return relData

In [16]:
# df_result2= {'dimensioning_flavor': 'medium-regular-sn-t20', 'network_function': 'uADPF', 'pods': [{'pod_name': 'DIP', 'pod_flavor': 'medium-2m'}, {'pod_name': 'DMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'CMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'PMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'RMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'IPP', 'pod_flavor': 'none.'}, {'pod_name': 'DPP', 'pod_flavor': 'medium-regular-sn-t20-xaca'}]}
# extract_pod_flavor_info(df_result2)

In [17]:
#Function to route the query to appropriate database based on the query content. This function takes a query as input and checks for specific keywords to determine whether to route it to the dimensioning flavor database or the pod flavor database. If the query contains keywords related to dimensioning, it routes to the dimensioning flavor database. Otherwise, it routes to the pod flavor database. The function returns the appropriate FAISS index object for further processing.
def route_query(query: str):
    # Define some simple rules for routing based on keywords
    dfdb_keywords = ["dimensioning"]
    pfdb_keywords = ["resources"]

    query = query.lower()

    # Check for keywords and route to corresponding DB
    if any(keyword in query for keyword in dfdb_keywords):
        return dfdbIndex
    elif any(keyword in query for keyword in pfdb_keywords):
        return pfdbIndex
    else:
        # Default or fall-back routing
        return dfdbIndex  

Count token in prompt

In [18]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(r"C:\Users\aakash.a1\Documents\VRN\Qwen3-32B")

def num_tokens(text):
    tokens = tokenizer.encode(text)
    # print("Token count:", len(tokens))
    return len(tokens)

Build a trimmed prompt

In [19]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(r"C:\Users\aakash.a1\Documents\VRN\Qwen3-32B")

MAX_CONTEXT_TOKENS = 32000         # Update to match your LLM
RESERVED_FOR_RESPONSE = 1024       # Tokens reserved for model's generated answer

def num_token(text):
    return len(tokenizer.encode(text))

def trim_history(qa_history, max_tokens=MAX_CONTEXT_TOKENS-RESERVED_FOR_RESPONSE):
    """
    Keeps as many recent history turns (Q&A) as fit in max_tokens.
    """
    trimmed = []
    total = 0
    # Only add from the *end* (most recent) and prepend for right order
    for q, a in reversed(qa_history):
        block = f"Q: {q}\nA: {a}"
        t = num_tokens(block)
        if total + t > max_tokens:
            break
        trimmed.insert(0, (q, a))
        total += t
    return trimmed

def summarize_if_needed(answer, max_tokens=512):
    # If response is too long, just truncate. Could be replaced by an LLM-driven summary
    tokens = tokenizer.encode(answer)
    if len(tokens) > max_tokens:
        tokens = tokens[:max_tokens]
        return tokenizer.decode(tokens)
    return answer

In [20]:
MAX_TOKENS = 32000  # use correct value for your model
RESERVED_TOKENS = 1024  # for the model's output

def build_prompt(system_prompt, history, rag_context, user_question):
    # Start with the new question and RAG context
    prompt = f"{system_prompt}\nContext:\n{rag_context}\nUser: {user_question}\n"
    current_tokens = num_tokens(prompt)
    
    # Add history, starting from the most recent
    for q, a in reversed(history):  # history = list of (Q, A) pairs
        block = f"User: {q}\nAssistant: {a}\n"
        if current_tokens + num_tokens(block) > MAX_TOKENS - RESERVED_TOKENS:
            break  # Stop adding if you would exceed the limit
        prompt = block + prompt  # Add to the front for chronological order
        current_tokens += num_tokens(block)

    return prompt

In [21]:
!pip install langchain ollama colorama 



GAP classs

In [22]:
from langchain_core.language_models.llms import LLM
from typing import Optional, List
import requests
import sys
import time

class GapLLM(LLM):
    model_name: str = "Qwen/Qwen3-32B"
    temperature: float = 0.1
    max_tokens: int = 16384
    top_p: float = 0.8
    top_k: int = 40
    repetition_penalty: float = 1.18
    key: str = "50n6kyhbspf7nrywpmjbif5bnb74xy9e"
    api_url: str = "https://api.gap-srib.com/gap/v2/txt/txt"
    app_id: str = "srOZp3TxPOn="

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        current_time = str(int(time.time() * 1000))
        headers = {
            "Content-Type": "application/json",
            "Authorization": f"Bearer {self.key}"
        }
        body = {
            "app_id": self.app_id,
            "mode": "chat",
            "max_tokens": self.max_tokens,
            "temperature": self.temperature, 
            "top_p": self.top_p,
            "top_k": self.top_k,
            "typical_p": 0.6,
            "repetition_penalty": self.repetition_penalty,
            "task_id": current_time,
            # "stream":True,
            "prompt": prompt,
            "preferred_model": self.model_name
        }

        res = requests.post(self.api_url, headers=headers, json=body, verify=False)
        res.raise_for_status()

        data = res.json()

        # Extract from `model_response` list safely
        if isinstance(data.get("model_response"), list) and data["model_response"]:
            return data["model_response"][0].strip()
        else:
            return "[Error] No valid response received from model."

    @property
    def _llm_type(self) -> str:
        return "gap-custom-llm"

PROMPTS

In [23]:
prompt1="You are an assistant. Based on the following context:\n{retrieved_context}\n\nChat History:\n{chat_history}\n\nAnswer this:\n{question}"
prompt2="You are a concise assistant. If the user greets you, respond politely in one sentence.\nUser: Hi\nAssistant:"
prompt3= "You are an assistant. Based on the following context:\n{retrieved_context}\n\nChat History:\n{chat_history}\n\nIMPORTANT: Only answer the specific question asked. Do not invent conversations or add questions that weren't asked\n\nDo not invent questions\n\nAnswer this question only:\n{question}"
prompt4= "You are a helpful assistant, who can generate the answer for the given Question.\nIf you don't know the answer, say I don't know.\nDon't try to make up an answer.\nMake sure to answer in crisp manner.\nDo Not hallucinate.\nAnswer in English language only.\nDo not repeat the question. \nIf there are multiple answers, give all of them.\nIf you need more information from user, ask them to be more specific.\nDo not give confusing answers.\nIf you're not sure, say I don't know.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity."
prompt5="<s>\n[INST]\nYou are a helpful assistant, who can generate the answer for the given Question.\nIf you don't know the answer, say I don't know.\nDon't try to make up an answer.\nMake sure to answer in crisp manner.\nDo Not hallucinate.\nAnswer in English language only.\nDo not repeat the question. \nIf there are multiple answers, give all of them.\nIf you need more information from user, ask them to be more specific.\nDo not give confusing answers.\nIf you're not sure, say I don't know.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\n[/INST]\n</s>\n[INST]\n{retrieved_context}\n[/INST]\n\n[INST]\n{chat_history}\n[/INST]\n[INST]\nQuestion: {question}\nAnswer: \n[/INST]\n[INST]\nGive short and crisp answer.\nDon't give additional information.\n[/INST]"
prompt6= "<s>\n[INST]\nYou are a helpful assistant, who can generate the answer for the given Question.\nIf you don't know the answer, say I don't know.\nDon't try to make up an answer.\nMake sure to answer in crisp manner.\nPlease keep going until the user‚Äôs query is completely resolved, before ending your turn and yielding back to the user. Only terminate your turn when you are sure that the problem is solved.\n\nDo Not hallucinate.\nAnswer in English language only.\nDo not repeat the question. \nIf there are multiple answers, give all of them.\nTake your time and go through each statement line by line before and after each action you decide to take.\n\nYou already have everything you need even without internet connection.\n\nIf you need more information from user, ask them to be more specific.\nDo not give confusing answers.\nIf you're not sure, say I don't know.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\n[/INST]\n</s>\n[INST]\n{retrieved_context}\n[/INST]\n\n[INST]\n{chat_history}\n[/INST]\n[INST]\nQuestion: {question}\nAnswer: \n[/INST]\n[INST]\nGive short and crisp answer.\nDon't give additional information.\n[/INST]"
prompt7= "<s>\n[INST]\nYou are a Network pod placement assistant, who can generate the answer for the given Question.\nIf you don't know the answer, say I don't know.\nDon't try to make up an answer.\nMake sure to answer in crisp manner.\nPlease keep going until the user‚Äôs query is completely resolved, before ending your turn and yielding back to the user..\n\nDo Not hallucinate.\nAnswer in English language only.\nDo not repeat the question. \nIf there are multiple answers, give all of them.\nTake your time and go through each statement line by line before and after each action you decide to take.\n\nYou already have everything you need even without internet connection.\n\nIf you need more information from user, ask them to be more specific.\nDo not give confusing answers.\nDo not include any <Think></Think> tags or similar blocks in your response.Provide only the final answer, and do not show any <Think>‚Ä¶</Think> or reasoning blocks.\nIf you're not sure, say I don't know.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\n[/INST]\n</s>\n[INST]\n{retrieved_context}\n[/INST]\n\n[INST]\n{chat_history}\n[/INST]\n[INST]\nQuestion: {question}\nAnswer: \n[/INST]\n[INST]\nGive short and crisp answer.\nDon't give additional information.\n[/INST]"
prompt8= "<s>\n[INST]\nYou are a Network pod placement assistant, who can generate the answer for the given Question.\nIf you don't know the answer, say I don't know.\nDon't try to make up an answer.\nMake sure to answer in crisp manner.\nPlease keep going until the user‚Äôs query is completely resolved, before ending your turn and yielding back to the user.\n\nDo Not hallucinate.\nAnswer in English language only.\nDo not repeat the question. \nIf there are multiple answers, give all of them.\nTake your time and go through each statement line by line.\n\nYou already have everything you need even without internet connection.\n\nIf you need more information from user, ask them to be more specific.\nDo not give confusing answers.\nIf you're not sure, say I don't know.\nAlways assist with care, respect, and truth. Respond with utmost utility yet securely. Avoid harmful, unethical, prejudiced, or negative content. Ensure replies promote fairness and positivity.\n[/INST]\n</s>\n[INST]\n{retrieved_context}\n[/INST]\n\n[INST]\n{chat_history}\n[/INST]\n[INST]\nQuestion: {question}\nAnswer: \n[/INST]\n[INST]\nGive short and crisp answer.\nDon't give additional information.\n[/INST]"

GAP Qwen/Qwen3-32B WITH  most similar context using FAISS (assuming db_index is set up) LOGIC OPTIMIZED

In [49]:
from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOllama
from langchain.schema.output_parser import StrOutputParser
from colorama import Fore, Style, init  
import re

init()  # Initialize colorama  

# --- Initialize LLM and Prompt ---
llm = GapLLM()
prompt_template = ChatPromptTemplate.from_template(prompt8)
chain = prompt_template | llm | StrOutputParser()

# --- In-memory storage for Q&A ---
qa_history = []  # stores (q, a)

df_result = None
df_agent=False
pf_agent=False

# For cumulative tracking
token_counts_per_turn = []
total_tokens = 0

# --- Chat loop ---
print(f"{Fore.YELLOW}üöÄ AI Assistant is ready! Type 'exit' to stop.{Style.RESET_ALL}")
# print(prompt_template)
while True:
    question = input("Q: ")
    if question.lower() in ['exit', 'quit', 'stop']:
        print(f"{Fore.YELLOW}üëã Exiting... Have a great day!{Style.RESET_ALL}")  
        break

    # **Display Question**  
    # print(f"\n{Fore.YELLOW}üîç Question: {question}\n{Style.RESET_ALL}")  
    retrieved_context = ""
    # Step 1: Retrieve the most similar context using FAISS (assuming db_index is set up)
    preprocess_data=False
    chosendb = route_query(question)
    if(df_result==None and chosendb == dfdbIndex):
        df_agent=True
        print(f"{Fore.CYAN}üìö Context Source: Dimensioning Database{Style.RESET_ALL}")  
        # dimension_flavor_context =extract_dimension_flavor_info2(question)
        dimension_flavor_context= extract_documents_from_query(df_map_list, question)
        print(dimension_flavor_context)
        retrieved_context = dict_to_context(dimension_flavor_context) if dimension_flavor_context else ""
        preprocess_data=True
        print(retrieved_context)
    elif(chosendb == pfdbIndex):
        pf_agent=True
        print(f"{Fore.CYAN}üìö Context Source: Pod Flavors Database{Style.RESET_ALL}")  
        retrieved_context = extract_pod_flavor_info(df_result)

    # **Check DR Rules**  
    dr_keyword = "pod placement"
    if(dr_keyword in question):
        print(f"{Fore.RED}‚ö†Ô∏è DR Rules Triggered: '{dr_keyword}' detected!{Style.RESET_ALL}") 
        with open("dr_rules_revamped.txt", "r", encoding="utf-8") as f:
            dr_rules = f.read()
        retrieved_context = dr_rules

    # **Show Context**  
    # print(f"\n{Fore.CYAN}üìÑ Context:{Style.RESET_ALL}\n{retrieved_context}\n")
    
    # Step 2: Get the page content of the most similar document
    # retrieved_context = context[0].page_content if context else ""

    # Step 2: Build the chat history (all Q&A pairs so far)
    chat_history = "\n".join([f"Q: {q}\nA: {a}" for q, a in qa_history])
    # chat_history = ""

    # prompt to count no. of tokens
    # Prepare prompt as seen by LLM (tweak if your template differs)
    prompt_for_token_count = (
        f"{retrieved_context}\n"
        f"{chat_history}\n"
        f"Q: {question}\nA:"
    )
    # Count tokens in the prompt
    token_count = num_tokens(prompt_for_token_count)
    token_counts_per_turn.append(token_count)
    total_tokens+= token_count
    print(f"[Debug] Input Tokens This Turn: {token_count}")
    print(f"[Debug] Total Input Tokens This Session: {total_tokens}")
    print(f"[Debug] Token usage per question: {token_counts_per_turn}") 
    # ------------------------

    # Step 3: Get the answer from the LLM using the current question, retrieved context, and chat history
    if(df_agent or pf_agent):
        response= retrieved_context
    else:
        response = chain.invoke({
            "retrieved_context": retrieved_context,
            "chat_history": chat_history,
            "question": question
        })
        # response = re.sub(r"<think>.*?</think>", "", response, flags=re.DOTALL).strip()
        response = response.split("</think>", 1)[1].strip()

    if(preprocess_data):
        df_result = preprocess_df_data(response)
        preprocess_data=False
        print("df-results",df_result)


    # Step 4: Store the current Q&A pair in history
    qa_history.append((question, response))
    df_agent=False
    pf_agent=False

    # **Display Answer**  
    print(f"{Fore.YELLOW}\nüí¨ Answer:{Style.RESET_ALL}\n{response}\n{Fore.MAGENTA}{'-'*50}{Style.RESET_ALL}")   

    # **Display POD Info**  
    if df_result is not None:  
        print(f"\n{Fore.BLUE}üîç Dimensioning Flavor Result: {df_result}{Style.RESET_ALL}\n") 

üöÄ AI Assistant is ready! Type 'exit' to stop.


üìö Context Source: Dimensioning Database
[{'Operator': 'VOS', 'Network Function': 'uADPF', 'Dimensioning Flavor': 'medium-regular-sn-t20', 'Package': '25B', 'DPP': 'medium-regular-sn-t20-xaca', 'DIP': 'medium-2m', 'DMP': 'medium-uni', 'CMP': 'medium-uni', 'PMP': 'medium-uni', 'RMP': 'medium-uni', 'IPP': nan}, {'Operator': 'VOS', 'Network Function': 'uADPF', 'Dimensioning Flavor': 'medium-regular-gnr-t20', 'Package': '25B', 'DPP': 'medium-regular-gsm-gnr-t20', 'DIP': 'medium-2m', 'DMP': 'medium-uni', 'CMP': 'medium-uni', 'PMP': 'medium-uni', 'RMP': 'medium-uni', 'IPP': 'small'}]
## Context Information

### Item 1
- Operator: VOS
- Network Function: uADPF
- Dimensioning Flavor: medium-regular-sn-t20
- Package: 25B
- Dpp: medium-regular-sn-t20-xaca
- Dip: medium-2m
- Dmp: medium-uni
- Cmp: medium-uni
- Pmp: medium-uni
- Rmp: medium-uni
- Ipp: nan

### Item 2
- Operator: VOS
- Network Function: uADPF
- Dimensioning Flavor: medium-regular-gnr-t20
- Package: 25B
- Dpp: medium-regular-gsm-


OUTPUTS

üöÄ AI Assistant is ready! Type 'exit' to stop.
üìö Context Source: Dimensioning Database
[Debug] Input Tokens This Turn: 254
[Debug] Total Input Tokens This Session: 254
[Debug] Token usage per question: [254]
c:\Users\aakash.a1\AppData\Local\anaconda3\envs\rag-env\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host 'api.gap-srib.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
  warnings.warn(

üí¨ Answer:
Assistant:
</think>

---  
Dimensioning Flavor: medium-regular-spr-t21  
Network Function: uADPF  
Pods and Pod Flavors:  
1. DPP: fdd-270m-18c-gsm-6trx-spr  
2. DIP: medium-uni  
3. DMP: medium-uni  
4. CMP: medium-uni  
5. PMP: medium-uni  
6. RMP: medium-uni  
7. IPP: medium  
---
--------------------------------------------------

üîç Dimensioning Flavor Result: {'dimensioning_flavor': 'medium-regular-spr-t21', 'network_function': 'uADPF', 'pods': [{'pod_name': 'DPP', 'pod_flavor': 'fdd-270m-18c-gsm-6trx-spr'}, {'pod_name': 'DIP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'DMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'CMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'PMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'RMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'IPP', 'pod_flavor': 'medium'}]}

üìö Context Source: Pod Flavors Database
[Debug] Input Tokens This Turn: 883
[Debug] Total Input Tokens This Session: 1137
[Debug] Token usage per question: [254, 883]
c:\Users\aakash.a1\AppData\Local\anaconda3\envs\rag-env\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host 'api.gap-srib.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
  warnings.warn(

üí¨ Answer:
**Resources for Each Pod Flavor:**  

1. **DPP**: `fdd-270m-18c-gsm-6trx-spr`  
   - vCPU Request: 52.0 | vCPU Limit: 52 | vMemory: 22.04 GB | Hugepage: 42.7 GB | Persistent Volume: 2(shared-pvc), 40(shared-log-pvc) | Package: 25A  

2. **DIP**: `medium-uni`  
   - vCPU Request: 2.0 | vCPU Limit: 3 | vMemory: 12.0 GB | Hugepage: Not Available | Persistent Volume: 2(shared-pvc), 40(shared-log-pvc) | Package: 25A  

3. **DMP**: `medium-uni`  
   - vCPU Request: 0.2 | vCPU Limit: 2 | vMemory: 2.0 GB | Hugepage: Not Available | Persistent Volume: 2(db-pvc), 4(shared-pvc), 40(shared-log-pvc) | Package: 25A  

4. **CMP**: `medium-uni`  
   - vCPU Request: 0.2 | vCPU Limit: 2 | vMemory: 2.0 GB | Hugepage: Not Available | Persistent Volume: 4(shared-pvc), 40(shared-log-pvc) | Package: 25A  

5. **PMP**: `medium-uni`  
   - vCPU Request: 0.1 | vCPU Limit: 2 | vMemory: 2.0 GB | Hugepage: Not Available | Persistent Volume: 4(shared-pvc), 40(shared-log-pvc) | Package: 25A  

6. **RMP**: `medium-uni`  
   - vCPU Request: 0.5 | vCPU Limit: 2 | vMemory: 2.0 GB | Hugepage: Not Available | Persistent Volume: 4(shared-pvc), 40(shared-log-pvc) | Package: 25A  

7. **IPP**: `medium`  
   - vCPU Request: 4.0 | vCPU Limit: 4 | vMemory: 2.0 GB | Hugepage: 2.0 GB | Persistent Volume: 2(shared-pvc), 40(shared-log-pvc) | Package: 25A
--------------------------------------------------

üîç Dimensioning Flavor Result: {'dimensioning_flavor': 'medium-regular-spr-t21', 'network_function': 'uADPF', 'pods': [{'pod_name': 'DPP', 'pod_flavor': 'fdd-270m-18c-gsm-6trx-spr'}, {'pod_name': 'DIP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'DMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'CMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'PMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'RMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'IPP', 'pod_flavor': 'medium'}]}

üìö Context Source: Dimensioning Database
‚ö†Ô∏è DR Rules Triggered: 'pod placement' detected!
[Debug] Input Tokens This Turn: 2396
[Debug] Total Input Tokens This Session: 3533
[Debug] Token usage per question: [254, 883, 2396]
c:\Users\aakash.a1\AppData\Local\anaconda3\envs\rag-env\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host 'api.gap-srib.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
  warnings.warn(

üí¨ Answer:

--------------------------------------------------

üîç Dimensioning Flavor Result: {'dimensioning_flavor': 'medium-regular-spr-t21', 'network_function': 'uADPF', 'pods': [{'pod_name': 'DPP', 'pod_flavor': 'fdd-270m-18c-gsm-6trx-spr'}, {'pod_name': 'DIP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'DMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'CMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'PMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'RMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'IPP', 'pod_flavor': 'medium'}]}

üìö Context Source: Dimensioning Database
‚ö†Ô∏è DR Rules Triggered: 'pod placement' detected!
[Debug] Input Tokens This Turn: 2792
[Debug] Total Input Tokens This Session: 6325
[Debug] Token usage per question: [254, 883, 2396, 2792]
c:\Users\aakash.a1\AppData\Local\anaconda3\envs\rag-env\lib\site-packages\urllib3\connectionpool.py:1097: InsecureRequestWarning: Unverified HTTPS request is being made to host 'api.gap-srib.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/latest/advanced-usage.html#tls-warnings
  warnings.warn(

üí¨ Answer:
#Calculation Total vCore Usage = Sum(Pod vCores) + CaaS(4) + Shared(2)
**DPP**: 52 vcore ([Resource]) ‚Üí Exceeds server capacity (48 vcore).
Placement Failure: Insufficient server core number (required ‚â•58 vcore vs available 48 vcore).
--------------------------------------------------

üîç Dimensioning Flavor Result: {'dimensioning_flavor': 'medium-regular-spr-t21', 'network_function': 'uADPF', 'pods': [{'pod_name': 'DPP', 'pod_flavor': 'fdd-270m-18c-gsm-6trx-spr'}, {'pod_name': 'DIP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'DMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'CMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'PMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'RMP', 'pod_flavor': 'medium-uni'}, {'pod_name': 'IPP', 'pod_flavor': 'medium'}]}

üëã Exiting... Have a great day!