In [None]:
# %pip install llama-index-llms-huggingface
# %pip install llama-index
# %pip install llama-index-finetuning
# %pip install pydantic==2.0.3

In [4]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [1]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


from IPython.display import Markdown, display

In [2]:
import torch
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core import PromptTemplate

# Model names (make sure you have access on HF)
LLAMA2_7B = "meta-llama/Llama-2-7b-hf"
LLAMA2_7B_CHAT = "meta-llama/Llama-2-7b-chat-hf"
LLAMA2_13B = "meta-llama/Llama-2-13b-hf"
LLAMA2_13B_CHAT = "meta-llama/Llama-2-13b-chat-hf"
LLAMA2_70B = "meta-llama/Llama-2-70b-hf"
LLAMA2_70B_CHAT = "meta-llama/Llama-2-70b-chat-hf"

selected_model = LLAMA2_13B_CHAT

SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source BGP data. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language typically used in business documents in North America.
- Never generate offensive or foul language.
"""

query_wrapper_prompt = PromptTemplate(
    "[INST]<>\n" + SYSTEM_PROMPT + "<>\n\n{query_str}[/INST] "
)

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=2048,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=selected_model,
    model_name=selected_model,
    device_map="auto",
    # change these settings below depending on your GPU
    model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": False},
)




Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [3]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']


In [4]:
from llama_index.core import Settings

Settings.llm = llm
Settings.embed_model = embed_model

In [5]:
from llama_index.core import SimpleDirectoryReader

# load documents
documents = SimpleDirectoryReader("./data/BGP_data/").load_data()

In [6]:
from llama_index.core import VectorStoreIndex

index = VectorStoreIndex.from_documents(documents)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

# Querying

In [7]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()

In [8]:
response = query_engine.query("What is the highest number of num_unique_prefixes_announced for AS23724?")
display(Markdown(f"{response}"))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]



Based on the additional context provided, the highest number of num_unique_prefixes_announced for AS23724 is 19, which appears in row 14 of the BGP data. This is consistent with the original answer.

In [9]:
response = query_engine.query("What is the highest number of announcements for AS23724?")
display(Markdown(f"{response}"))

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Based on the provided BGP data, the highest number of announcements for AS23724 is 19, which occurs in row 14 of the dataset. This is reflected in the following announcements:

* Row 234: 2010-04-12 09:15:00
* Row 235: 2010-04-12 09:20:00
* Row 236: 2010-04-12 09:25:00

All of these announcements have a value of 0 for the AS path length and MED values, indicating that they are all originated from AS23724.

In [10]:
import time

query_engine = index.as_query_engine(streaming=True)
response = query_engine.query("Provide the summary of the data.")

start_time = time.time()

token_count = 0
for token in response.response_gen:
    print(token, end="")
    token_count += 1

time_elapsed = time.time() - start_time
tokens_per_second = token_count / time_elapsed

print(f"\n\nStreamed output at {tokens_per_second} tokens/s")

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Based on the provided context, here is a more detailed summary of the data:

The data consists of 366 rows, each representing a single entry in a BGP table. Each row contains 10 columns, providing information about the IP prefix, next hop, and other attributes of the entry. The columns are as follows:

* IP prefix (in CIDR notation)
* Next hop IP address
* Next hop AS number
* Origin type (e.g., "i" for inet, "g" for gigabit)
* Origin AS number
* AS path length
* MED (Multi-Exit Discriminator)
* Weight
* Confidence

The data appears to be a snapshot of a BGP table at a particular point in time, with each row representing a single entry in the table. The columns provide information about the IP prefix, next hop, and other attributes of the entry.

The data covers a period of 5 minutes, from 20:00:00 to 20:15:00, with each row representing a single entry in the table at a specific point in time. The entries are grouped into three rows for each minute, with each row representing a differe