In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import os.path as osp
import sys
ROOT_DIR = osp.dirname(os.getcwd())
sys.path.append(ROOT_DIR)

# Set up models, vectorstore and retriever

In [3]:
from langchain_chroma import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers import ContextualCompressionRetriever
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.retrievers import EnsembleRetriever
from tqdm import tqdm
from lib.normic_wrapper import NomicEmbedding
from lib.config import LLM_MODEL_NAME, EMBEDDING_MODEL_NAME, VECTOR_STORE_NAME,COMPRESSION_RETRIEVER_TOP_N,VECTOR_RETRIEVER_K,RERANKER_MODEL_NAME
from lib.custom_retriever import CustomRetriever

In [4]:
from transformers import AutoTokenizer,AutoModelForCausalLM
from peft import PeftModel
import transformers
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
DOCUMENT_PATH='../data/rel18/'
VECTOR_STORE_PATH = '../data/vectorstore/'
EMBEDDING_KWARGS = {'allow_download': 'True'}#,'dimensionality':512, 'prefix':'search_query'

In [6]:
embeddings = GPT4AllEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    gpt4all_kwargs =EMBEDDING_KWARGS,
    device='gpu',
)

# embeddings=HuggingFaceBgeEmbeddings(model_name=EMBEDDING_MODEL_NAME, model_kwargs={"device": "cuda"}, encode_kwargs={"normalize_embeddings": True})
# embeddings = NomicEmbedding(model_name=EMBEDDING_MODEL_NAME,dimensionality=512,device='gpu')

In [7]:
vectorstore = Chroma(persist_directory=VECTOR_STORE_PATH+VECTOR_STORE_NAME, embedding_function=embeddings)

In [8]:
VECTOR_STORE_NAME

'chromadb_512_32'

In [9]:
len(vectorstore.get(limit=1,include=['embeddings'])['embeddings'][0])

384

from langchain_community.llms.llamafile import Llamafile
from langchain.retrievers.multi_query import MultiQueryRetriever, LineListOutputParser

# Set logging for the queries
import logging

logging.basicConfig()
logger = logging.getLogger("langchain.retrievers.multi_query")
logger.setLevel(logging.INFO)

from typing import List

from langchain_community.llms.llamafile import Llamafile
from langchain.chains import LLMChain
from langchain_core.output_parsers import  BaseOutputParser
# from langchain.retrievers.multi_query import LineListOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field


# Output parser will split the LLM result into a list of queries
# class LineList(BaseModel):
#     # "lines" is the key (attribute name) of the parsed output
#     lines: List[str] = Field(description="Lines of text")
    
#     def append(self, item:str):
#         self.lines.append(item)

class LineListOutputParser(BaseOutputParser[List[str]]):

    def parse(self, text: str) -> List[str]:
        lines = list(filter(lambda s:s != '',list(map(lambda s:s.strip(),text.split("\n")))))
        return lines


output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate two different versions of the given user question to retrieve relevant documents from a vector database. By generating multiple perspectives on the user question, your goal is to help the user overcome some of the limitations of the distance-based similarity search. Provide these alternative questions separated by newlines.
    {question}
    """)

llm =  Llamafile(base_url='http://127.0.0.1:8080',seed=123)
# # Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

In [10]:
# VECTOR_RETRIEVER_K = 50
# COMPRESSION_RETRIEVER_TOP_N=2
# RERANKER_MODEL_NAME='BAAI/bge-reranker-large'

In [11]:
from lib.deduplicate_retriever import DeduplicateRetriever


vstore_retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs= {'k': VECTOR_RETRIEVER_K, }
)
vstore_retriever = DeduplicateRetriever(base_retriever=vstore_retriever)

In [12]:
# pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device_map="auto")
# tiny_token = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0")
# messages = [
#     {
#         "role": "system",
#         "content": "You are a friendly chatbot who always responds in the style of a pirate",
#     },
#     {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
# ]
# prompt = tiny_token.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# print(prompt)

from langchain.retrievers.self_query.chroma import ChromaTranslator
#self 
metadata_field_info = [
    AttributeInfo(
        name="source",
        description="The name of the document where the content was taken from",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the standard was released",
        type="integer",
    ),
    AttributeInfo( #3GPP Release
        name="tag",
        description="The 3GPP release information",
        type="string",
    ),
]
# self_llm = CustomTransformersLLM(model_name='microsoft/phi-2', max_length=128) 
document_content_description = 'contains technical details about telecommunications standards'
self_retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    structured_query_translator= ChromaTranslator()
)





# custom_retriever = CustomRetriever(compression_retriever,answer_generator,32)
# multiquery
# llm = Llamafile()
# retriever_from_llm = MultiQueryRetriever.from_llm(llm=llm,
#     retriever=vstore_retriever
# )
retriever_from_llm = MultiQueryRetriever(
    retriever=compression_retriever, llm_chain=llm_chain, include_original=True
)

In [13]:
#compression
rerank_model = HuggingFaceCrossEncoder(model_name=RERANKER_MODEL_NAME, model_kwargs = {'device': 'cuda'})

compressor = CrossEncoderReranker(model=rerank_model, top_n=COMPRESSION_RETRIEVER_TOP_N)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=vstore_retriever
)



In [14]:
question = "How does a supporting UE attach to the same core network operator from which it detached in a shared network? [3GPP Release 17]"
print(question)

How does a supporting UE attach to the same core network operator from which it detached in a shared network? [3GPP Release 17]


In [16]:
# docs = compression_retriever.invoke(question)
# for d in docs:
#     print(d.page_content)
#     print('----')

In [16]:
docs = compression_retriever.invoke(question)
for d in docs:
    print(d)
    print('----')

page_content='If the UE can proceed to attach, it initiates the Attach procedure by the transmission, to the eNodeB, of an Attach Request (IMSI or old GUTI, Old GUTI type, last visited TAI (if available), UE Core Network Capability, UE Specific DRX parameters, extended idle mode DRX parameters, UE paging probability information, Attach Type, ESM message container (Request Type, PDN Type, Protocol Configuration Options, Ciphered Options Transfer Flag, Header Compression Configuration), KSIASME, NAS sequence number,' metadata={'source': '23401-i40.docx'}
----
page_content='with another UE is performed via the network.' metadata={'source': '23304-i40.docx'}
----
page_content='means that the communication with another UE is performed via the network.' metadata={'source': '23700-33-i00.docx'}
----
page_content='NOTE 1:\tIn shared networks, when the message is sent from the VPLMN to the HPLMN, the PLMN ID that is communicated in this IE shall be that of the selected Core Network Operator for

In [None]:
print(VECTOR_STORE_NAME)
print(VECTOR_RETRIEVER_K)
print(COMPRESSION_RETRIEVER_TOP_N)

# Inference

In [None]:
from lib.prompt import get_mcq_inference_prompt
import json
import pandas as pd

In [None]:
import random
len({'j':6})

In [None]:
#Uncomment
answer_model_name = LLM_MODEL_NAME
tokenizer = AutoTokenizer.from_pretrained(answer_model_name)
tokenizer.pad_token = tokenizer.eos_token 
# base_model = AutoModelForCausalLM.from_pretrained(answer_model_name,device_map="auto",)
# answer_model = PeftModel.from_pretrained(base_model, '../bin/pretrained_256_64/', device_map="auto")
# pretrained_512_32
answer_model = AutoModelForCausalLM.from_pretrained('../bin/pretrained_512_32/',device_map="auto",)
# from huggingface
# answer_model = AutoModelForCausalLM.from_pretrained(answer_model_name,device_map="auto")

answer_generator = transformers.pipeline(
    "text-generation",
    model=answer_model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)

# question_answering = transformers.pipeline(
#     # "question-answering",
#     model="deepset/roberta-base-squad2",
#     # tokenizer='google-bert/bert-base-cased',
#     # torch_dtype=torch.bfloat16,
#     device_map="cuda:0"
# )

In [None]:
def answer_questions(qst_filename, answer_model, retriever, max_new_tokens=4,return_full_text=False, batch_size = 128, sample_size=-1):
    with open(qst_filename) as file:
        questions = json.load(file)
    solutions = []
    def get_question_prompt(qstn_id,qstn_data): # in tqdm(questions.items()):
        qstn_id=qstn_id.split(' ')[1].strip()
        qstn_text = qstn_data['question']
        #searching through datastore for context
        docs = retriever.invoke(qstn_text)
        context =  (' '.join(list(map(lambda d:d.page_content,docs)))).replace('\n', '. ')
        infer_data = get_mcq_inference_prompt(qstn_data, context)
        prompt = infer_data['prompt']
        return qstn_id,prompt
    
    if sample_size < 0 :
        sampled_questions = list(questions.items())
    else:
        sampled_questions = random.sample(list(questions.items()),sample_size)
        
    prompts = list(map(lambda entry:get_question_prompt(entry[0],entry[1]),tqdm(sampled_questions)))
    print(prompts[0])
    num_prompts = len(prompts)
    for i in tqdm(range(0,num_prompts,batch_size)):
        current_prompts=list(map(lambda e:e[1],prompts[i:i+batch_size]))
        current_qstn_ids=list(map(lambda e:e[0],prompts[i:i+batch_size]))
        responses = answer_model(current_prompts,max_new_tokens=max_new_tokens, return_full_text=return_full_text)
        current_ans_ids =list(map(lambda r:r[0]['generated_text'].split(':')[0][-1:].strip(),responses))
        solutions += list(zip(current_qstn_ids,current_ans_ids))
        
    return solutions

In [None]:
def save_solution(filename,solution, task=''):
    df = pd.DataFrame(solution,columns=['Question_ID','Answer_ID'])
    df['Task'] = task
    df.to_csv(filename,index=False,)

In [None]:
train_soln = answer_questions('../data/Question_Submission.txt',answer_generator,compression_retriever,sample_size=-1,batch_size = 128)

In [None]:
save_solution('testing_result.csv',train_soln,'Phi-2')

# Evaluation

In [None]:
# import pandas as pd
# pred = pd.read_csv('training_result.csv')
# act = pd.read_csv('../data/Q_A_ID_training.csv')
# act = act[act['Question_ID'].isin(pred['Question_ID'])]
# len(act)
# import numpy as np
# pred['Answer_ID'][pred['Answer_ID'].isna()]
# pred=pred.sort_values(by="Question_ID").reset_index(drop=True)
# act=act.sort_values(by="Question_ID").reset_index(drop=True)
# pred['Answer_ID']=pred['Answer_ID'].astype(int)
# (pred['Answer_ID'] == act['Answer_ID']).mean()
# |algo|score|
# |--|--|
# |similarity|0.6235455167693361|
# |mmr|xxx|
# VECTOR_STORE_NAME
# pred[pred['Answer_ID'] != act['Answer_ID']].head()
# context from datastore
# question = q
# docs = compression_retriever.invoke(question['question'])
# context =  (' '.join(list(map(lambda d:d.page_content,docs)))).replace('\n', '. ')
# prompt = get_inference_prompt(question, context)
# refined_prompt = prompt['question']
# answer = prompt['answer']
# print(refined_prompt)
# gen_result = answer_generator(refined_prompt,max_new_tokens=128,return_full_text=False,)
# "option 1) Avoid monitoring neighbo"
# print(answer)
# print(gen_result[0]['generated_text'])