In [1]:
from torch import cuda, bfloat16
import transformers
from transformers import (
    pipeline,
    logging,
)
import pandas as pd
import os

model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

# bnb_config = transformers.BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type='nf4',
#     bnb_4bit_use_double_quant=True,
#     bnb_4bit_compute_dtype=bfloat16
# )

hf_token = os.environ.get('hf_token')

# Need auth token for these
hf_auth = hf_token
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)
model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Model loaded on cuda:0


In [2]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

In [3]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt_input = "Change the AS number to 7812 and total run time to 500. Add a function that will plot a bar graph with the top 5 prefix announcements and withdrawals along with print statements."
prompt_output = """
import pybgpstream
import pandas as pd
import matplotlib.pyplot as plt
import threading
import time

# Initialize BGPStream instance
stream = pybgpstream.BGPStream(
    project="ris-live",
    record_type="updates",
)

# Initialize DataFrames for announcements and withdrawals
announcements = pd.DataFrame(columns=['Time', 'Prefix', 'Count'])
withdrawals = pd.DataFrame(columns=['Time', 'Prefix', 'Count'])

def process_bgp_data(stop_time):
    global announcements, withdrawals
    end_time = time.time() + stop_time  # Calculate end time based on stop_time duration
    last_print_time = time.time()

    while time.time() < end_time:
        for rec in stream.records():
            for elem in rec:
                # Filter records with 24482 in the AS path
                as_path = elem.fields.get('as-path', '').split()
                if '24482' in as_path:
                    time_str = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(rec.time))
                    prefix = elem.fields['prefix']
                    df = announcements if elem.type == 'A' else withdrawals
                    row_index = df[(df['Time'] == time_str) & (df['Prefix'] == prefix)].index
                    if row_index.empty:
                        df.loc[len(df)] = [time_str, prefix, 1]
                    else:
                        df.loc[row_index, 'Count'] += 1

            # Check if it's time to print and reset DataFrames (every 60 seconds)
            if time.time() - last_print_time >= 60:
                # Sort and get the top 5 prefixes by count for announcements
                top_announcements = announcements.sort_values(by='Count', ascending=False).head(5)
                # Sort and get the top 5 prefixes by count for withdrawals
                top_withdrawals = withdrawals.sort_values(by='Count', ascending=False).head(5)
                
                print("Top 5 Announcements:\n", top_announcements)
                print("Top 5 Withdrawals:\n", top_withdrawals)
                
                announcements = pd.DataFrame(columns=['Time', 'Prefix', 'Count'])
                withdrawals = pd.DataFrame(columns=['Time', 'Prefix', 'Count'])
                last_print_time = time.time()  # Update last_print_time

    print("Data collection finished.")

# Define the total running time (180 seconds)
total_running_time = 180

# Start data collection thread
data_collection_thread = threading.Thread(target=, args=(total_running_time,))
data_collection_thread.start()
"""

pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=2056)
result = pipe(f"<s>[INST] {prompt_input} {prompt_output} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Change the AS number to 7812 and total run time to 500. Add a function that will plot a bar graph with the top 5 prefix announcements and withdrawals along with print statements. 
import pybgpstream
import pandas as pd
import matplotlib.pyplot as plt
import threading
import time

# Initialize BGPStream instance
stream = pybgpstream.BGPStream(
    project="ris-live",
    record_type="updates",
)

# Initialize DataFrames for announcements and withdrawals
announcements = pd.DataFrame(columns=['Time', 'Prefix', 'Count'])
withdrawals = pd.DataFrame(columns=['Time', 'Prefix', 'Count'])

def process_bgp_data(stop_time):
    global announcements, withdrawals
    end_time = time.time() + stop_time  # Calculate end time based on stop_time duration
    last_print_time = time.time()

    while time.time() < end_time:
        for rec in stream.records():
            for elem in rec:
                # Filter records with 24482 in the AS path
                as_path = elem.fields.get('as-pa

In [None]:
prompt_input = "Identify the peer with the higest number of updates in the data below"
# prompt_input = "Summarize the data"


bgp_data = """
On 2019-06-06 04:35:00, Autonomous System 1136 observed 0 announcements. The total number of active routes was 0. The maximum path length observed was 0 hops, with an average path length of 0.00 hops. The maximum edit distance was 0, with an average of 0.00. The graph average degree was 0.00. The graph betweenness centrality was 0.0000. The graph closeness centrality was 0.0000. The graph eigenvector centrality was 0.0000. The average MED was 0.00. The average local preference was 0.00. There were 0 total communities observed, with 0 unique communities. The number of unique peers was 43. The average prefix length was 0.00, with a maximum of 0 and a minimum of 0. The top peers by updates were: AS20514 with 19014.0 updates, AS57264 with 10395.0 updates, AS29504 with 7461.0 updates, AS202365 with 6755 updates, AS174 with 6448.0 updates.

On 2019-06-06 04:40:00, Autonomous System 1136 observed 0 announcements. The total number of active routes was 0. The maximum path length observed was 0 hops, with an average path length of 0.00 hops. The maximum edit distance was 0, with an average of 0.00. The graph average degree was 0.00. The graph betweenness centrality was 0.0000. The graph closeness centrality was 0.0000. The graph eigenvector centrality was 0.0000. The average MED was 0.00. The average local preference was 0.00. There were 0 total communities observed, with 0 unique communities. The number of unique peers was 42. The average prefix length was 0.00, with a maximum of 0 and a minimum of 0. The top peers by updates were: AS174 with 7787.0 updates, AS202365 with 6610 updates, AS29504 with 5978.0 updates, AS15562 with 4278 updates, AS22652 with 3862 updates.

On 2019-06-06 04:45:00, Autonomous System 1136 observed 0 announcements. The total number of active routes was 0. The maximum path length observed was 0 hops, with an average path length of 0.00 hops. The maximum edit distance was 0, with an average of 0.00. The graph average degree was 0.00. The graph betweenness centrality was 0.0000. The graph closeness centrality was 0.0000. The graph eigenvector centrality was 0.0000. The average MED was 0.00. The average local preference was 0.00. There were 0 total communities observed, with 0 unique communities. The number of unique peers was 43. The average prefix length was 0.00, with a maximum of 0 and a minimum of 0. The top peers by updates were: AS174 with 7705.0 updates, AS29504 with 7083.0 updates, AS202365 with 6665 updates, AS22652 with 5682 updates, AS205593 with 4065 updates.
"""
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1012)
result = pipe(f"<s>[INST] {prompt_input}: {bgp_data} [/INST]")
print(result[0]['generated_text'])