In [2]:
!pip list

Package                            Version
---------------------------------- ------------------
absl-py                            1.4.0
accelerate                         1.3.0
aiohappyeyeballs                   2.4.6
aiohttp                            3.11.13
aiosignal                          1.3.2
alabaster                          1.0.0
albucore                           0.0.23
albumentations                     2.0.4
ale-py                             0.10.2
altair                             5.5.0
annotated-types                    0.7.0
anyio                              3.7.1
argon2-cffi                        23.1.0
argon2-cffi-bindings               21.2.0
array_record                       0.6.0
arviz                              0.20.0
astropy                            7.0.1
astropy-iers-data                  0.2025.2.24.0.34.4
astunparse                         1.6.3
atpublic                           4.1.0
attrs                              25.1.0
audioread            

In [1]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [3]:
!pip install scapy pandas

Collecting scapy
  Downloading scapy-2.6.1-py3-none-any.whl.metadata (5.6 kB)
Downloading scapy-2.6.1-py3-none-any.whl (2.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: scapy
Successfully installed scapy-2.6.1


In [4]:
!pip install torch transformers bitsandbytes openai huggingface

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting huggingface
  Downloading huggingface-0.0.1-py3-none-any.whl.metadata (2.9 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3

In [5]:
import os
import re
import ast
import time
import subprocess
import psutil
import json
from pathlib import Path

import transformers
import torch
from openai import OpenAI
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer


In [6]:
torch.cuda.empty_cache()

In [7]:
from google.colab import userdata
from huggingface_hub import login

os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
login(token = userdata.get("hf_token"))
client = OpenAI()

In [8]:
with open("questions.json", "r") as f:
    questions_data = json.load(f)
questions = questions_data["questions"]

In [9]:
# 모델별 생성된 코드를 저장할 디렉토리
OUTPUT_DIR = Path("generated_codes")
OUTPUT_DIR.mkdir(exist_ok=True)
RESULTS_FILE = "benchmark_results.csv"
PCAP_FILE = "smallFlows.pcap"

# 모델별 코드 저장
model_codes = {
    "GPT-3.5": [],
    "LLaMA3": [],
    "CustomLLM": []
}

# 벤치마킹 결과 저장
results = []

In [10]:
import torch
import transformers

device = f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
)

# Need auth token for these
hf_auth = os.environ.get('hf_token')

## Benchmark

In [11]:
# Python 코드 추출 함수
def extract_python_code(response_text):
    pattern_block = r"```[Pp]?ython?\s*(.*?)\s*```"
    match_block = re.search(pattern_block, response_text, re.DOTALL)

    pattern_raw = r"^(import\s+\w+|from\s+\w+)(.*)$"
    match_raw = re.match(pattern_raw, response_text.strip(), re.DOTALL)

    if match_block:
        code = match_block.group(1).strip()
        return code if code else False
    elif match_raw:
        code = (match_raw.group(1) + match_raw.group(2)).strip()
        return code if code else False
    return False

# 코드 문법 확인 함수
def is_python_code(code):
    try:
        ast.parse(code)
        return True
    except SyntaxError:
        return False

In [12]:
def save_code_to_file(question_idx, model_name, code):
    file_name = f"script_q{question_idx+1}_{model_name.lower()}.py"
    file_path = OUTPUT_DIR / file_name
    with open(file_path, "w") as f:
        f.write(code)
    return file_path

# 코드 실행 및 메트릭 수집 함수
def run_script(script_path, pcap_file=PCAP_FILE) -> dict:
    process = psutil.Process()
    start_time = time.time()
    start_memory = process.memory_info().rss / 1024 / 1024

    output_file = f"output_{script_path.stem}.json"
    cmd = f"python {script_path}"
    result = subprocess.run(cmd, shell=True, capture_output=True, text=True)

    end_time = time.time()
    end_memory = process.memory_info().rss / 1024 / 1024

    execution_time = end_time - start_time
    memory_usage = end_memory - start_memory
    success = result.returncode == 0
    syntax_valid = is_python_code(open(script_path).read()) if os.path.exists(script_path) else False

    return {
        "success": success,
        "execution_time": execution_time,
        "memory_usage": memory_usage,
        "syntax_valid": syntax_valid,
        "output_file": output_file if success else None,
    }


def generate_code(model_name, model, tokenizer, prompt):
    if model_name == "GPT-3.5":
        completion = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "user", "content": prompt}
            ],
        )
        response = completion.choices[0].message.content
        with open(f"prompt/gpt_response.txt", "a") as f:
            f.write(response)
            f.write("\n\n\n\n\n")
    elif model_name == "llama3":
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**inputs, max_length=500)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    else:
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**inputs, max_length=500)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return extract_python_code(response)


def evaluate_model(model_name, model, tokenizer=None):
    pass_count = 0
    total_tasks = len(questions)

    for idx, question in enumerate(questions):
        print(f"{model_name} - Question {idx + 1}: {question}")
        code = generate_code(model_name, model, tokenizer, question)
        if code is False:
            print(f"model : {model_name} - Q{idx + 1}")
            code = "!!!!! Invalid code generated !!!!!"

        script_path = save_code_to_file(idx, model_name, code)
        result = run_script(script_path)

        passed = result["syntax_valid"]
        if passed:
            pass_count += 1

        results.append({
            "question_idx": idx + 1,
            "model": model_name,
            "question": question,
            "pass@1": passed,
            "execution_time": result["execution_time"],
            "memory_usage": result["memory_usage"],
            "syntax_valid": result["syntax_valid"],
            "output_file": result["output_file"]
        })

    pass_at_1 = pass_count / total_tasks
    print(f"{model_name} Pass@1: {pass_at_1:.2f}")
    return pass_at_1

In [13]:
def benchmark():
    gpt_model = 'GPT-3.5'
    llama_model = 'meta-llama/Meta-Llama-3-8B-Instruct'
    network_llama_model = 'choihyuunmin/mobile-Llama-3-Instruct'

    llama_config = transformers.AutoConfig.from_pretrained(
        llama_model,
        token=hf_auth
    )

    network_llama_config = transformers.AutoConfig.from_pretrained(
        network_llama_model,
        token=hf_auth
    )

    llama_model = AutoModelForCausalLM.from_pretrained(
        llama_model,
        trust_remote_code=True,
        config=llama_config,
        quantization_config=bnb_config,
        token=hf_auth,
        torch_dtype=torch.float16,
        device_map=device
    )

    llama_tokenizer = AutoTokenizer.from_pretrained(llama_model)

    custom_model = AutoModelForCausalLM.from_pretrained(
        network_llama_model,
        trust_remote_code=True,
        config=network_llama_config,
        quantization_config=bnb_config,
        token=hf_auth,
        torch_dtype=torch.float16,
        device_map=device
    )

    network_llama_tokenizer = AutoTokenizer.from_pretrained(network_llama_model)

    llama_model.eval()
    custom_model.eval()
    network_llama_tokenizer.pad_token = network_llama_tokenizer.eos_token
    network_llama_tokenizer.padding_side = "right"

    # 모델 평가
    evaluate_model("GPT-3.5", None)
    evaluate_model("LLaMA3", llama_model, llama_tokenizer)
    evaluate_model("Mobile-Llama3", custom_model, network_llama_tokenizer)

    # 결과 저장
    with open(RESULTS_FILE, "w") as f:
        headers = "question_idx,model,question,pass@1,execution_time,memory_usage,syntax_valid,output_file\n"
        f.write(headers)
        for r in results:
            f.write(f"{r['question_idx']},{r['model']},\"{r['question']}\",{r['pass@1']},{r['execution_time']},{r['memory_usage']},{r['syntax_valid']},{r['output_file']}\n")


In [None]:
benchmark()