### python_39_env

In [None]:
import transformers
from transformers import (
    pipeline,
    logging,
)
from torch import cuda
import os

In [None]:
import os
import json
import logging
from threading import Lock
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
import torch

# Define model IDs
CUSTOM_MODEL = "hyonbokan/BGPStream13-10k-cutoff-1024-max-2048"
LLAMA3_8B_INSTRUCT = "meta-llama/Meta-Llama-3.1-8B-Instruct"

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Initialize model, tokenizer, and lock
model = None
tokenizer = None
model_lock = Lock()

# Load the model only once
def load_model():
    global model, tokenizer
    if model is None or tokenizer is None:  # Check if model or tokenizer is already loaded
        with model_lock:
            if model is None or tokenizer is None:  # Double-check inside the lock
                try:
                    model_id = LLAMA3_8B_INSTRUCT
                    hf_auth = os.environ.get('HF_TOKEN')
                    
                    model_config = AutoConfig.from_pretrained(
                        model_id,
                        use_auth_token=hf_auth
                    )
                    model = AutoModelForCausalLM.from_pretrained(
                        model_id,
                        trust_remote_code=True,
                        config=model_config,
                        device_map='auto',
                        use_auth_token=hf_auth
                    )
                    tokenizer = AutoTokenizer.from_pretrained(
                        model_id,
                        use_auth_token=hf_auth
                    )
                    
                    # Set padding token if not set
                    if tokenizer.pad_token is None:
                        tokenizer.add_special_tokens({'pad_token': '[PAD]'})

                    tokenizer.padding_side = "right"
                    model.resize_token_embeddings(len(tokenizer))

                    logger.info("Model loaded successfully")
                except Exception as e:
                    logger.error(f"Failed to load the model: {str(e)}")
                    raise
    return model, tokenizer

# Generate response function
def generate_llm_response(query):
    logger.info(f"User query: {query}")
    try:
        # Ensure model and tokenizer are loaded
        model, tokenizer = load_model()

        # Tokenize the input query
        inputs = tokenizer(
            query,
            return_tensors='pt',
            padding=True,
            truncation=True,
            max_length=1500
        )

        input_ids = inputs.input_ids.to(model.device)
        attention_mask = inputs.attention_mask.to(model.device)
        
        # Generation settings
        generation_kwargs = dict(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_new_tokens=712,
            do_sample=True,
            temperature=0.1,
            top_p=0.9,
            top_k=50,
            repetition_penalty=1.1,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

        # Generate output
        generated_ids = model.generate(**generation_kwargs)

        # Decode and print the generated text
        generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        print("Generated text:")
        print(generated_text)

    except Exception as e:
        logger.error(f"Error generating LLM response: {str(e)}")

# Load the model once at startup
load_model()


In [8]:
import pybgpstream
from datetime import datetime
import ipaddress
from collections import defaultdict

# Define time range
from_time = "2024-03-15 13:00:00"
until_time = "2024-03-15 14:00:00"

# Initialize BGPStream
stream = pybgpstream.BGPStream(
    from_time=from_time,
    until_time=until_time,
    record_type="updates",
    collectors=["rrc00", "route-views.amsix"]
)

# Process records and elements
announcements = defaultdict(int)
withdrawals = defaultdict(int)
prefix_as_paths = {}
community_counts = defaultdict(int)

for rec in stream.records():
    for elem in rec:
        elem_time = datetime.utcfromtimestamp(elem.time)
        elem_type = elem.type
        fields = elem.fields
        prefix = fields.get("prefix")
        if prefix is None:
            continue
        
        as_path_str = fields.get('as-path', "")
        as_path = as_path_str.split()
        
        peer_asn = elem.peer_asn
        collector = rec.collector
        
        communities = fields.get('communities', [])
        
        try:
            network = ipaddress.ip_network(prefix)
        except ValueError:
            continue
        
        target_asn = '3356'
        if target_asn not in as_path:
            continue
        
        target_prefixes = ['192.0.2.0/24', '198.51.100.0/24']
        if prefix not in target_prefixes:
            continue
        
        if elem_type == 'A':
            announcements[prefix] += 1
        elif elem_type == 'W':
            withdrawals[prefix] += 1
        
        if prefix in prefix_as_paths:
            if as_path!= prefix_as_paths[prefix]:
                prefix_as_paths[prefix] = as_path
        else:
            prefix_as_paths[prefix] = as_path
        
        for community in communities:
            community_str = f"{community[0]}:{community[1]}"
            community_counts[community_str] += 1

# Print summary of announcements and withdrawals
print(f"Announcements: {sum(announcements.values())}")
print(f"Withdrawals: {sum(withdrawals.values())}")

Announcements: 0
Withdrawals: 0


### LLaMA2-7B:
`Multiple Choice v2`:
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18

`Multiple Choice v1`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

### LLaMA2-13B:
`Multiple Choice v2`:
    Accuracy: 60.00%
    Correct answers: 21
    Incorrect answers: 14

`Multiple Choice v1`:
    Accuracy: 71.43%
    Correct answers: 25
    Incorrect answers: 10

### BGPtest9- cosine - `Best`:
`Multiple Choice v2`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v1`:
    - Accuracy: 82.86%
    - Correct answers: 29
    - Incorrect answers: 6

### BGPtest10- constant:
`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13

`Multiple Choice v1`:
    - Accuracy: 71.43%
    - Correct answers: 25
    - Incorrect answers: 10

`Fill the blank v1`:
    - Accuracy: 88.57%
    - Correct answers: 31
    - Incorrect answers: 4

`Fill the blank v2`:
    - Accuracy: 91.42%
    - Correct answers: 33
    - Incorrect answers: 2


### BGP-LLaMA-1-cosine-2k-alpha64:
`Multiple Choice v2`:
    Accuracy: 51.43%
    Correct answers: 18
    Incorrect answers: 17

`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

### BGP-LLaMA-2-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 68.57%
    Correct answers: 24
    Incorrect answers: 11

`Multiple Choice v2`:
    Accuracy: 54.29%
    Correct answers: 19
    Incorrect answers: 16


### BGP-LLaMA-2(combined)-cosine-20k-alpha16:
`Multiple Choice v1`:
    Accuracy: 80.00%
    Correct answers: 28
    Incorrect answers: 7

`Multiple Choice v2`:

    Accuracy: 65.71%
    Correct answers: 23
    Incorrect answers: 12

### BGP-LLaMA-13b-2iter-40k-cutoff-max-2048: ??
`Multiple Choice v1`:
    Accuracy: 77.14%
    Correct answers: 27
    Incorrect answers: 8

    
`Multiple Choice v2`:
    Accuracy: 62.86%
    Correct answers: 22
    Incorrect answers: 13

`PyBGPStream`: not pass

### BGP-LLaMA-13b-3-30k-cutoff-max-2048:
`Multiple Choice v1`:
    Accuracy: 57.14%
    Correct answers: 20
    Incorrect answers: 15

`Multiple Choice v2`:
    Accuracy: 54%

`PyBGPStream`: pass

### BGP-LLaMA-13b-50k-cutoff-max-2048: 
`Multiple Choice v1`:
    Accuracy: 60.00%
    Correct answers: 21
    Incorrect answers: 14
`PyBGPStream`: pass

### BGP-LLaMA-13b-20k-cutoff-max-2048: 
`Multiple Choice v1`
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18
`PyBGPStream`: pass


### BGP-LLaMA-13b-20k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 45.71%
    Correct answers: 16
    Incorrect answers: 19
`PyBGPStream`: pass


### BGP-LLaMA-13b-30k-cutoff-1024-max-none:
`Multiple Choice v1`
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13

`PyBGPStream`: pass


### BGP-LLaMA-13b-40k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 48.57%
    Correct answers: 17
    Incorrect answers: 18
`PyBGPStream`: not pass

### BGP-LLaMA-13b-50k-cutoff-1024-max-none:
`Multiple Choice v1`
    Accuracy: 40.00%
    Correct answers: 14
    Incorrect answers: 21
`PyBGPStream`: not pass


### BGP-LLaMA26k-13b-30k-cutoff-1024-max-None:
`Multiple Choice v1`: not pass

`PyBGPStream`: pass

### BGP-LLaMA26k-13b-20k-cutoff-1024-max-None:
`Multiple Choice v1`:
Incorrect
Accuracy: 37.14%
Correct answers: 13
Incorrect answers: 22
`PyBGPStream`: pass

### BGP-LLaMA26k-13b-10k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13
`PyBGPStream`: pass (better than previous)


### BGP-LLaMA26k-13b-5k-cutoff-1024-max-2048 - `Best Combined`
`Multiple Choice v1`:
Accuracy: 82.86%
Correct answers: 29
Incorrect answers: 6

`Multiple Choice v2`:
Accuracy: 71.43%
Correct answers: 25
Incorrect answers: 10

### BGP-LLaMA-knowledge-20k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 62.86%
Correct answers: 22
Incorrect answers: 13

`Multiple Choice v2`:
Accuracy: 48.57%
Correct answers: 17
Incorrect answers: 18

### BGP-LLaMA-knowledge-5k-cutoff-1024-max-2048 `best knowledge`
`Multiple Choice v1`:
Accuracy: 85.71%
Correct answers: 30
Incorrect answers: 5

`Multiple Choice v2`:
Accuracy: 71.43%
Correct answers: 25
Incorrect answers: 10

### BGP-LLaMA-knowledge-3k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 82.86%
Correct answers: 29
Incorrect answers: 6

`Multiple Choice v2`:
Accuracy: 74.29%
Correct answers: 26
Incorrect answers: 9

### BGP-LLaMA-knowledge-2k-cutoff-1024-max-2048
`Multiple Choice v1`:
Accuracy: 82.86%
Correct answers: 29
Incorrect answers: 6

`Multiple Choice v2`:
Accuracy: 71.43%
Correct answers: 25
Incorrect answers: 10