In [None]:
import os
os.environ["VLLM_ATTENTION_BACKEND"] = "FLASH_ATTN"
from IPython.display import Markdown, display
from tqdm.notebook import tqdm

from llama_index.core import SimpleDirectoryReader, load_index_from_storage, StorageContext, VectorStoreIndex
from llama_index.readers.file import (
    DocxReader,
    PptxReader,
    XMLReader,
    PyMuPDFReader,
)
from llama_index.llms.vllm import Vllm
# from llama_index.llms.huggingface import HuggingFaceLLM

In [2]:
dataset_dir = os.getcwd() + "/datasets"
storage_dir = os.getcwd() + "/storage"

if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)
if not os.path.exists(storage_dir):
    os.makedirs(storage_dir)

In [None]:
parse = True
if parse == True:
    # Parse Data
    parser = PyMuPDFReader()
    file_extractor = {".pdf": parser}
    documents = SimpleDirectoryReader(
        dataset_dir, file_extractor=file_extractor
    ).load_data()

    # Saving to VectorDB
    index = VectorStoreIndex.from_documents(documents)
    index.set_index_id("vector_index")
    index.storage_context.persist("./storage")

[Document(id_='9e600051-5352-447d-8756-1c86579a6e42', embedding=None, metadata={'file_path': '/voyager/projects/jacobyhsi/seaRAG/datasets/2025Q1.pdf', 'file_name': '2025Q1.pdf', 'file_type': 'application/pdf', 'file_size': 217820, 'creation_date': '2025-02-10', 'last_modified_date': '2025-02-10', 'total_pages': 20, 'source': '1'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='Farm Fresh Berhad \nRegistration Number: 201001010221 (894851-U) \n(Incorporated in Malaysia) \n \n \nInterim Financial Report \nFirst quarter ended 30 June 2024 \n \n \n \n', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resou

In [None]:
# Load VectorDB
storage_context = StorageContext.from_defaults(persist_dir="storage")
index = load_index_from_storage(storage_context, index_id="vector_index")

<llama_index.core.indices.vector_store.base.VectorStoreIndex object at 0x7f90f00dbdc0>


In [5]:
# LLM
llm = Vllm(model='meta-llama/Meta-Llama-3-8B-Instruct', tensor_parallel_size=4)
# llm = Vllm(model='meta-llama/Meta-Llama-3-70B-Instruct', tensor_parallel_size=4)
# llm = Vllm(model='nvidia/Llama-3.1-Nemotron-70B-Instruct-HF', tensor_parallel_size=4)

# llm = Vllm(model='meta-llama/Meta-Llama-3-70B-Instruct', tensor_parallel_size=4)
# llm = Vllm(model='deepseek-ai/DeepSeek-R1-Distill-Llama-70B', tensor_parallel_size=4)
# llm = HuggingFaceLLM(
#     model_name='nvidia/Llama-3.1-Nemotron-70B-Instruct-HF',  # or another Hugging Face model of your choice
#     device_map="auto"    # Automatically map model layers to available devices (GPU/CPU)
# )

2025-02-11 00:50:34,283	INFO worker.py:1841 -- Started a local Ray instance.


INFO 02-11 00:50:40 llm_engine.py:98] Initializing an LLM engine (v0.4.1) with config: model='meta-llama/Meta-Llama-3-8B-Instruct', speculative_config=None, tokenizer='meta-llama/Meta-Llama-3-8B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=8192, download_dir=None, load_format=auto, tensor_parallel_size=4, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), seed=0)
INFO 02-11 00:50:54 utils.py:608] Found nccl from library /h/300/jacobyhsi/.config/vllm/nccl/cu12/libnccl.so.2.18.1
[36m(RayWorkerWrapper pid=422008)[0m INFO 02-11 00:50:54 utils.py:608] Found nccl from library /h/300/jacobyhsi/.config/vllm/nccl/cu12/libnccl.so.2.18.1
INFO 02-11 00:51:02 selector.py:28] Using FlashAttention backend.
[36m(RayWorkerWrapper 

In [7]:
chat_engine = index.as_chat_engine(chat_mode="best")
# chat_engine = index.as_chat_engine(chat_mode="best", verbose=True)

In [10]:
while True:
    user_input = input("User: ")
    if user_input.lower() in ["exit", "quit"]:
        break  # Exit the loop

    streaming_response = chat_engine.stream_chat(user_input)
    
    markdown_response = f"**User:** {user_input}\n\n**LLM:** "  # Format for Markdown

    for chunk in streaming_response.response_gen:
        markdown_response += chunk  # Append chunks to build the full response

    display(Markdown(markdown_response))  # Display in Markdown format

**User:** hi

**LLM:** Hello! How can I assist you today?

**User:** What is the Unaudited 30.9.2024 Total assets from Farm Fresh Berhad's Condensed consolidated statement of financial position as at 30 September 2024?

**LLM:** The Unaudited Total assets from Farm Fresh Berhad's Condensed consolidated statement of financial position as at 30 September 2024 is RM 1,357,649,000.

**User:** what does this figure imply? explain to me in detail.

**LLM:** To provide a detailed analysis of the implications of Farm Fresh Berhad's Total assets figure, we need to consider various factors such as the company's financial health, growth prospects, market position, and industry trends. The Total assets figure represents the total value of all assets owned by the company, including both current and non-current assets. Here are some implications of this figure:

1. **Financial Strength**: A higher Total assets figure indicates that Farm Fresh Berhad has significant resources and investments. This suggests that the company has a strong financial position and asset base, which can be used for future growth, expansion, and investment opportunities.

2. **Liquidity**: The composition of assets (current vs. non-current) can provide insights into the company's liquidity. A higher proportion of current assets indicates better short-term liquidity, while a significant portion of non-current assets may signify long-term investments.

3. **Growth Potential**: Increasing Total assets over time may indicate the company's growth potential and ability to generate returns on its investments. It reflects the company's capacity to expand its operations and invest in new projects.

4. **Market Competitiveness**: A substantial Total assets figure can enhance Farm Fresh Berhad's competitiveness in the market. It may allow the company to compete effectively, attract investors, and strengthen its market position relative to competitors.

5. **Investor Perception**: Investors often look at Total assets as a measure of a company's stability and value. A strong asset base can attract investor confidence and positively impact the company's stock performance and market valuation.

6. **Risk Management**: The Total assets figure also plays a role in assessing the company's risk management strategies. A diversified asset base can help mitigate risks and uncertainties in the business environment.

Overall, the Total assets figure of Farm Fresh Berhad provides valuable insights into the company's financial standing, growth potential, and market competitiveness. It is essential to analyze this figure in conjunction with other financial metrics to gain a comprehensive understanding of the company's performance and prospects.