In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import os.path as osp
import sys
ROOT_DIR = osp.dirname(os.getcwd())
sys.path.append(ROOT_DIR)

# Set up models, vectorstore and retriever

In [3]:
from langchain_chroma import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.cross_encoders import HuggingFaceCrossEncoder
from langchain.retrievers.document_compressors import CrossEncoderReranker
from langchain.retrievers import ContextualCompressionRetriever
from tqdm import tqdm

In [4]:
DOCUMENT_PATH='../data/rel18/'
VECTOR_STORE_PATH = '../data/vectorstore/'

In [5]:
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
encode_kwargs = {'allow_download': 'True'}
embeddings = GPT4AllEmbeddings(
    model_name=model_name,
    device='cpu',
    gpt4all_kwargs=encode_kwargs
)

In [6]:
vectorstore = Chroma(persist_directory=VECTOR_STORE_PATH+"chromadb", embedding_function=embeddings)

In [7]:
vectorstore._collection.get(limit=1)

{'ids': ['00000a5c-dc75-47d5-83c5-4763f5cf0ef1'],
 'embeddings': None,
 'metadatas': [{'source': '../data/rel18/28550-i30.docx'}],
 'documents': ['The Performance Data Stream Units are described using ASN.1 as specified in ITU-T Rec. X.680 [15] and X.681 [16]. Transfer syntax for Performance Data Stream Units is derived from their ASN.1 definitions by use of Packed Encoding Rules (PER), aligned as'],
 'uris': None,
 'data': None}

In [8]:
from transformers import AutoTokenizer,AutoModelForCausalLM
from peft import PeftModel
import transformers
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
model = 'microsoft/phi-2'

In [10]:
tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModelForCausalLM.from_pretrained(model,device_map="auto",)
peft_model = PeftModel.from_pretrained(model, '../bin/pretrained/', device_map="auto")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.72s/it]


In [11]:
answer_generator = transformers.pipeline(
    "text-generation",
    model=peft_model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalL

In [12]:
rerank_model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-base")



In [13]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={'k': 20}
)
compressor = CrossEncoderReranker(model=rerank_model, top_n=10)
compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

# Inference

## Set up question

In [14]:
import json
with open('../data/TeleQnA_training.txt') as file:
    questions = json.load(file)

In [15]:
question = questions['question 152']

## Get context

In [16]:
#context from datastore
docs = compression_retriever.invoke(question['question'])
context =  (' '.join(list(map(lambda d:d.page_content,docs)))).replace('\n', '. ')

In [17]:
from langchain_core.prompts import PromptTemplate
from lib.prompt import get_inference_prompt


In [18]:
# prompt = qa_prompt.invoke({'question':question['question'],'context':context}).to_string()
# print(prompt)
# gen_result = answer_generator(prompt,max_new_tokens=256,return_full_text=False)
# refined_context = gen_result[0]['generated_text'].replace('\n','')
# refined_context

In [19]:
# question['answer']

In [20]:
question

{'question': 'What does the Relaxed monitoring for cell reselection feature in NB-IoT allow the UE to do? [3GPP Release 15]',
 'option 1': 'Avoid monitoring neighbouring cells for 24 hours in stationary or non-changing network topology',
 'option 2': 'Request NPUSCH resources for BSR transmission',
 'option 3': 'Reduce system acquisition time for the UE',
 'option 4': 'Support small cell deployments with lower maximum transmit power',
 'option 5': 'Improve the accuracy of narrowband measurement',
 'answer': 'option 1: Avoid monitoring neighbouring cells for 24 hours in stationary or non-changing network topology',
 'explanation': 'The Relaxed monitoring for cell reselection feature allows the UE to avoid monitoring neighbouring cells for 24 hours in cases where the UE is stationary and/or the network topology is not changing. This helps to extend UE battery life.',
 'category': 'Standards overview'}

In [21]:
prompt = get_inference_prompt(question, context)

In [22]:
refined_prompt = prompt['question']
answer = prompt['answer']

In [23]:
print(refined_prompt)

### Instructions: 
Based on only the provided context, select the correct answer from the choices given. Provide your answer in the following format: option Number) Answer. Do not include any additional text or explanation.

Context:
-	Relaxed monitoring for cell reselection: When this feature is enabled and the criteria for relaxed monitoring are fulfilled, the UE can reduce its neighbor cell measurements to as seldom as every 24 hours. This can reduce the power consumption In RRC_IDLE, for NB-IoT UEs, when enabled in the cell and the relaxed monitoring criterion is fulfilled, the UE may perform serving cell measurements on the non-anchor paging carrier as specified in TS 36.133 [21]. For NB-IoT, cell reselection identifies the cell that the UE should camp on. It is based on cell reselection criteria which involve measurements of the serving and neighbour cells as follows: -	Cell reselection identifies the cell that the UE should camp on. It is based on cell reselection criteria which

In [24]:
gen_result = answer_generator(refined_prompt,max_new_tokens=128,return_full_text=False,)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


In [25]:
print(answer)

option 1: Avoid monitoring neighbouring cells for 24 hours in stationary or non-changing network topology


In [26]:
print(gen_result[0]['generated_text'])

option 1) Avoid monitoring neighbouring cells for 24 hours in stationary or non-changing network topology

### Explanation:
The Relaxed monitoring for cell reselection feature in NB-IoT allows the UE to reduce its neighbour cell measurements to as seldom as every 24 hours. This can reduce the power consumption in RRC_IDLE, for NB-IoT UEs, when enabled in the cell and the relaxed monitoring criterion is fulfilled. This feature is only applicable for NB-IoT UEs, BL UEs or UEs in enhanced coverage.

### Follow-up Questions:
1.
