In [7]:
import transformers
from transformers import (
    pipeline,
    logging,
)
from typing import List
import torch
from torch import cuda, bfloat16
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# import seaborn as sns
from pylab import rcParams
import os

In [8]:
model_id = 'hyonbokan/llama7_5G_analysis_pcap_5kEpoch'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'


# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    # quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)


model.eval()
print(f"Model loaded on {device}")

Downloading config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)l-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)l-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Model loaded on cuda:0


In [9]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

tokenizer.pad_token = tokenizer.eos_token
# tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = "right"

Downloading tokenizer_config.json:   0%|          | 0.00/1.57k [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

In [10]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Given a n3.pcap file containing network traffic data, create a Python script to process the PCAP. The script should extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP. Store this data in a pandas DataFrame and display it."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=712)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Given a n3.pcap file containing network traffic data, create a Python script to process the PCAP. The script should extract the source IP, destination IP, source port, destination port, and the raw payload data for each packet in the PCAP. Store this data in a pandas DataFrame and display it. [/INST]  Sure, here is an example of a Python script that can be used to process a pcap file containing 5G network traffic data and extract the relevant information about each packet, including the source IP, destination IP, source port, destination port, and the raw payload data:

import pandas as pd
from scapy.all import *

# Load the pcap file
packets = rdpcap('/path/to/n3.pcap')

# Create a DataFrame to store the packet information
df = pd.DataFrame(columns=['Source IP', 'Destination IP', 'Source Port', 'Destination Port', 'Raw Payload'])

# Iterate through each packet and extract the relevant information
for packet in packets:
    # Extract the source IP
    src_ip = packet[IP].src


In [11]:
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "Generate Python code to calculate 5G network performance KPIs: Total Network Capacity, Capacity per Area, Capacity per Point, Cost per Capacity, Cost per Area, and Surplus per Area. Load data from '5G_Infrastructure/demand_driven_postcode_data_results.csv'. Use keywords: 'capacity', 'cost', 'area', 'numpoints' to identify relevant columns."
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=1024)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] Generate Python code to calculate 5G network performance KPIs: Total Network Capacity, Capacity per Area, Capacity per Point, Cost per Capacity, Cost per Area, and Surplus per Area. Load data from '5G_Infrastructure/demand_driven_postcode_data_results.csv'. Use keywords: 'capacity', 'cost', 'area', 'numpoints' to identify relevant columns. [/INST]  Certainly! Here is an example of Python code that can be used to calculate 5G network performance KPIs from the '5G_Infrastructure/demand_driven_postcode_data_results.csv' file:

import pandas as pd
import matplotlib.pyplot as plt

# Read the data
df = pd.read_csv('5G_Infrastructure/demand_driven_postcode_data_results.csv')

# Calculate total network capacity
capacity_per_area = df['capacity_per_area'].mean()
capacity_per_point = df['capacity_per_point'].mean()

# Calculate cost per capacity
cost_per_capacity = df['cost_per_capacity'].mean()

# Calculate capacity per area
capacity_per_area_avg = df['capacity_per_area'].mean()

# Ca