# LLM

In [1]:
from llama_index.core import Settings
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

model_name = "DistilGPT2"

# Load the model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")

# Create the HuggingFaceLLM instance
llm = HuggingFaceLLM(
    model=model,
    tokenizer=tokenizer,
    context_window=2048,
    max_new_tokens=512,  # Increase this
    model_kwargs={"temperature": 0.5, "do_sample": True},  # Adjust these
)


# Update the global settings
Settings.llm = llm
Settings.embed_model = "local:BAAI/bge-small-en-v1.5"  # Or any other embedding model you prefer

  from .autonotebook import tqdm as notebook_tqdm





To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [2]:
from llama_index.core import SimpleDirectoryReader

txt_file = 'D:\All_Coding_stuff\FinancialDashboard\Extracted_text'
documents = SimpleDirectoryReader(txt_file).load_data()

## Sentence Window RAG 

In [3]:
from llama_index.core.node_parser import SentenceWindowNodeParser

In [4]:
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size = 3,
    window_metadata_key="window",
    original_text_metadata_key="original_text"
)

In [5]:
for i in documents:
    print(i)

Doc ID: 459b94ee-eb30-4cd9-b0df-ed7a413be244
Text: Annual Report 2018-19         Hindustan Unilever
Limited51ReportsFinancial StatementsOverviewIndias (WiMi) in 2016. It
has been a journey strengthening the WiMi thinking across markets,
end-to-end planning and ways of working. This has helped the Company
to move the needle on quality of servicing and in-market execution by
getting closer to the ...
Doc ID: 335f4c2f-6a9b-426f-92be-4cc847e1a684
Text: Accordingly, consent of the members is sought for passing an
Ordinary Resolution as set out at Special Business Item No. 2 of the
Notice for ratification of the remuneration payable to the Cost
Auditors for the financial year ending March 31, 2015.The Board
recommends the proposal at special business Item No. 2 for approval by
the shareholders.N...
Doc ID: be8e557a-0fe5-4df7-91b8-2956ed349656
Text: Annexures (Continued)176 Receipts & Payments Account177
Schedules242 Salient features of the Financial Statement of
Subsidiaries (Form AOC-I)243 

In [6]:
nodes = node_parser.get_nodes_from_documents(documents)

In [7]:
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings

base_nodes = Settings.text_splitter.get_nodes_from_documents(documents)

# Vector Index

In [8]:
from llama_index.core import VectorStoreIndex

In [None]:
sentence_index = VectorStoreIndex(nodes)

In [None]:
base_index = VectorStoreIndex(base_nodes)

# Query
- During quering stage , we now use the MetadataReplacementPostProcessor to replaace the sentence in eachnode with its surrounding context.

In [None]:
from llama_index.core.postprocessor import MetadataReplacementPostProcessor 

query_engine = sentence_index.as_query_engine(
    similarity_top_k = 2,
    node_postprocessors = [
        MetadataReplacementPostProcessor(target_metadata_key = "window")
    ],
)

# Chatbot

In [None]:
response = query_engine.query(
    "Tell me about ICICIPRULI_AR"
)
print(str(response))