In [1]:
import yaml
import json
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import pprint
import google.generativeai as genai

# 指定 log 檔案
du_log_file = "/home/aiml/johnson/Scenario/Scenario_3/DU/log/Scenario_3.log"
ru_log_file = "/home/aiml/johnson/Scenario/Scenario_3/RU/log/RU.log"

pcap_path = "/home/aiml/johnson/Scenario/Scenario_3/FH/fh.pcap"
debug_yaml_path = "/home/aiml/johnson/thesis_rag/Integration_dataset/debug.yaml"
reference_context_path = "/home/aiml/johnson/Scenario/Scenario_3/reference_config.txt"

current_config_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3.conf"
current_config_json_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3.conf.segments.json"

rag_after_conf_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf"
rag_after_json_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf.segments.json"

none_rag_after_conf_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf"
none_rag_after_json_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf.segments.json"

In [2]:
with open( debug_yaml_path , "r") as f:
    debug_data = yaml.safe_load(f)

# 將每一筆資料嵌入的格式（以 symptom + log 為主）
embedding_docs = []
for item in debug_data:
    content = f"Stage: {item['stage']}\nSymptom: {item['symptom']}\nLog: {item['log_snippet']}\n"

    if "notes" in item and item["notes"]:
        content += f"Notes: {item['notes']}\n"

    related_config_str = ", ".join(item["related_config"])  # ✅ Convert list to comma-separated string
    metadata = {
        "stage": item["stage"],
        "related_config": related_config_str
    }
    embedding_docs.append({"content": content, "metadata": metadata})

# pprint.pprint(embedding_docs) #for checking

In [3]:
# 你也可以改用 Gemini 或 OpenAI embedding
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 將文本嵌入向量並存入 Chroma 資料庫
texts = [d["content"] for d in embedding_docs]
metadatas = [d["metadata"] for d in embedding_docs]

vectordb = Chroma.from_texts(texts, embedding=embedding, metadatas=metadatas, persist_directory="./error_db")
vectordb.persist()

print("✅ Debug embedding 建立完成並已儲存")



# 檢查嵌入總筆數
print("📦 總筆數：", vectordb._collection.count())
# 顯示前幾筆嵌入資料內容（包括原始文本與 metadata）
peek_data = vectordb._collection.get(limit=2)

for i in range(len(peek_data["documents"])):
    print(f"\n--- Entry {i+1} ---")
    print("Document ID:", peek_data["ids"][i])
    print("Document Text:", peek_data["documents"][i])
    print("Metadata:", peek_data["metadatas"][i])

  embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
2025-04-30 03:03:07.887833: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-30 03:03:07.906439: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-30 03:03:07.906464: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-30 03:03:07.906972: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has alread

✅ Debug embedding 建立完成並已儲存
📦 總筆數： 12

--- Entry 1 ---
Document ID: 2490cd60-a3d3-4485-bbcc-dfb55ee1d0d9
Document Text: Stage: fh_setup
Symptom: PCAP file contains zero packets, likely due to incorrect MAC address configuration
Log: [FH] No packets captured – possible mismatch in MAC address between DU and RU
Metadata: {'related_config': 'ru_addr, du_addr', 'stage': 'fh_setup'}

--- Entry 2 ---
Document ID: 41214951-d646-4f62-bfcd-23f34c016659
Document Text: Stage: fh_setup
Symptom: DU reports continuous 'Received Time doesn't correspond to the time we think it is' and 'Jump in frame counter' errors after XRAN start. Observed mismatch between expected and received frame/slot numbers and frequent double sync detection.
Log: [PHY]   Jump in frame counter last_frame 86 => 167, slot 19; [PHY]   Received Time doesn't correspond to the time we think it is (slot mismatch, received 167.19, expected 86.11); [PHY]   Detected double sync message 371.6 => 371.7
Metadata: {'related_config': 'iq_widt

  vectordb.persist()


## Check RU LOG

In [4]:
import re
import os

def extract_ru_log_info(file_path):
    if not os.path.exists(file_path):
        print("NO RU log (file not found)")
        return "NO RU log"

    try:
        with open(file_path, "r", encoding="utf-8") as f:
            lines = f.readlines()
    except Exception as e:
        print(f"NO RU log (error reading file: {e})")
        return "NO RU log"

    # Step 1: 優先找 ERNO 與其後續錯誤描述
    erno_pattern = re.compile(r"ERNO 0x[0-9A-Fa-f]+\s+0x[0-9A-Fa-f]+\s+0x[0-9A-Fa-f]+")
    error_blocks = []
    i = 0

    while i < len(lines):
        if erno_pattern.search(lines[i]):
            block = [lines[i]]
            ts_prefix = lines[i][:17]  # timestamp 開頭
            i += 1
            while i < len(lines):
                if lines[i].startswith(ts_prefix) or ">>>Oran" in lines[i] or " at line " in lines[i]:
                    block.append(lines[i])
                    i += 1
                else:
                    break
            error_blocks.append("".join(block))
        else:
            i += 1

    if error_blocks:
        print("=== ERNO Errors with Traceback ===")
        return "\n".join(error_blocks)

    # Step 2: 擷取 RX-WINDOW-STATS + RX-WINDOW-TIMING 整段
    stats_started = False
    timing_block = []

    for line in lines:
        if "RX-WINDOW-STATS" in line:
            stats_started = True
        if stats_started:
            timing_block.append(line)
            if "RX_LATEST_C_DL" in line:
                break  # 到 timing 最後一項為止

    print("=== Timing Window Extracted ===")
    return "".join(timing_block)

log_result = extract_ru_log_info(ru_log_file)

query = log_result

=== ERNO Errors with Traceback ===


## Check FH

In [5]:
import subprocess
from scapy.utils import RawPcapReader


# pcap_path = "/home/aiml/johnson/thesis_rag/fh_pcap_sample/normal.pcap"

packet_count = 0

# Filter the packet content through tshark and search for the keywords "C-Plane" and "U-Plane"
def count_plane_packets(keyword):
    result = subprocess.run(
        ["tshark", "-r", pcap_path],
        capture_output=True,
        text=True
    )
    lines = result.stdout.splitlines()
    return sum(1 for line in lines if keyword in line)

def get_packet_count(pcap_file):
    result = subprocess.run(
        ["tshark", "-r", pcap_file, "-q", "-z", "io,stat,0"],
        capture_output=True,
        text=True
    )
    for line in result.stdout.splitlines():
        if "|   " in line and "Frames" in line:
            try:
                fields = line.split("|")
                frame_info = fields[2].strip()  # Ex: "X frames"
                frame_count = int(frame_info.split()[0])
                return frame_count
            except Exception:
                pass
    return 0


for (pkt_data, pkt_metadata) in RawPcapReader(pcap_path):
    packet_count += 1

c_plane_count = count_plane_packets("C-Plane")
u_plane_count = count_plane_packets("U-Plane")
print("Control_Plane_Packets: ", c_plane_count)
print("User_Plane_Packets: ", u_plane_count)


FileNotFoundError: [Errno 2] No such file or directory: '/home/aiml/johnson/Scenario/Scenario_3/FH/fh.pcap'

## Check DU log

In [6]:
import yaml
import re
from pathlib import Path

def clean_text(s):
    """去除ANSI控制字元 + 移除引號 + 去除多餘空格"""
    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
    s = ansi_escape.sub('', s)
    s = s.replace("'", "").replace('"', "")
    s = s.strip()
    return s

if not Path(du_log_file).exists():
    raise FileNotFoundError(f"Log file not found: {du_log_file}")
if not Path(debug_yaml_path).exists():
    raise FileNotFoundError(f"Debug YAML not found: {debug_yaml_path}")

# 讀取 debug.yaml
with open(debug_yaml_path, 'r', encoding='utf-8') as f:
    debug_data = yaml.safe_load(f)

# 整理出 (log_snippet, stage) 對應
target_entries = []
for item in debug_data:
    if 'log_snippet' in item:
        snippet = item['log_snippet']
        stage = item.get('stage', 'unknown')
        if ";" in snippet:
            parts = [s.strip() for s in snippet.split(";")]
            for p in parts:
                target_entries.append((p, stage))
        else:
            target_entries.append((snippet.strip(), stage))

# 搜索 log
found_results = []
with open(du_log_file, 'r', encoding='utf-8', errors='ignore') as f:
    for raw_line in f:
        line = clean_text(raw_line)
        for snippet, stage in target_entries:
            snippet_cleaned = clean_text(snippet)
            # 🔥 只要關鍵字部分包含就算符合
            if snippet_cleaned in line:
                found_results.append((stage, snippet))
                
# 輸出結果
if found_results:
    for stage, snippet in found_results:
        print(f"✅ Found matching log for stage [{stage}]: {snippet}")
        query = snippet
else:
    print("❌ No matching logs found.")


✅ Found matching log for stage [fh_setup]: [PHY]   Jump in frame counter last_frame 86 => 167, slot 19
✅ Found matching log for stage [fh_setup]: [PHY]   Received Time doesn't correspond to the time we think it is (slot mismatch, received 167.19, expected 86.11)
✅ Found matching log for stage [fh_setup]: [PHY]   Detected double sync message 371.6 => 371.7


In [7]:
results = vectordb.similarity_search(query, k=2)

for r in results:
    print("- Matched:", r.page_content)
    print("- Related config:", r.metadata["related_config"])
    print("-----------------------------------------------")

matched_case = results[0]
matched_symptom = matched_case.page_content
matched_related_config = matched_case.metadata.get("related_config", "")


- Matched: Stage: fh_setup
Symptom: DU reports continuous 'Received Time doesn't correspond to the time we think it is' and 'Jump in frame counter' errors after XRAN start. Observed mismatch between expected and received frame/slot numbers and frequent double sync detection.
Log: [PHY]   Jump in frame counter last_frame 86 => 167, slot 19; [PHY]   Received Time doesn't correspond to the time we think it is (slot mismatch, received 167.19, expected 86.11); [PHY]   Detected double sync message 371.6 => 371.7
- Related config: iq_width_prach, iq_width
-----------------------------------------------
- Matched: Stage: fh_setup
Symptom: DU reports continuous 'Received Time doesn't correspond to the time we think it is' and 'Jump in frame counter' errors after XRAN start. Observed mismatch between expected and received frame/slot numbers and frequent double sync detection.
Log: [PHY]   Jump in frame counter last_frame 86 => 167, slot 19; [PHY]   Received Time doesn't correspond to the time we

In [8]:
with open(current_config_json_path, "r") as f:
    config_segments_context = json.load(f)
with open(reference_context_path, "r") as f:
    reference_context = f.read()

# RAG prompt_template
# prompt_template = f"""
# You are a 5G network expert. Your job is to revise configuration files based on observed network issues and debug knowledge.

# Issue Description:
# "{query}"

# Matching debug knowledge:
# {matched_case.page_content}
# Relevant parameters: {matched_case.metadata["related_config"]}

# Reference Device Address Table (external reference file):
# {reference_context}

# Current configuration block:
# {config_segments_context}

# Please revise the configuration using correct addresses from the reference. Output only the revised config section.

# Return a list of JSON objects with the following structure:
# [
#   {{
#     "label": "parameter_name",
#     "content": "parameter_name = (...);",
#     "reference_reason": "Short explanation matching the value to the reference device table (e.g., correct MAC, matches expected setting).",
#     "model_reason": "Additional expert analysis in 1-2 sentences explaining why this change is necessary, beneficial, or resolves a network issue."
#   }},
#   ...
# ]

# - Only include parameters listed in 'Relevant parameters'.
# - Do not include any explanation outside of the JSON structure.
# - Keep "reference_reason" based on the reference table.
# - Derive "model_reason" from your own technical reasoning.

# """

prompt_template = f"""
You are a 5G network expert. Your job is to revise configuration files based on observed network issues.

Issue Description:
"{query}"

Current configuration block:
{config_segments_context}

Please revise the configuration to resolve the described issue based on your technical expertise.

Return a list of JSON objects with the following structure:
[
  {{
    "label": "parameter_name",
    "content": "parameter_name = (...);",
    "model_reason": "Technical explanation in 1-2 sentences explaining why this change is necessary, beneficial, or resolves the network issue."
  }},
  ...
]

- Only revise parameters that are necessary to resolve the issue.
- If no changes are needed, return an empty list: []
- Strictly output only valid JSON without any additional text or explanation.
"""


## LLM API 設定

In [9]:
genai.configure(api_key="AIzaSyCSK7WFIon0kt_iPbqvzaJqwI9vNE5mwdM")
model = genai.GenerativeModel("gemini-2.0-flash")
# for m in genai.list_models():
#     print(m.name)

####################################################################################################################################################################################

# from langchain_nvidia_ai_endpoints import ChatNVIDIA

# client = ChatNVIDIA(
#   model="meta/llama-3.1-70b-instruct",
#   api_key="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", 
#   temperature=0,
#   top_p=0.7,
#   max_tokens=1024,
# )

# for chunk in client.stream([{"role":"user","content":""}]): 
#   print(chunk.content, end="")
# response = client.invoke([{"role": "user", "content": prompt_template}])
# print(response.content)



In [10]:
response = model.generate_content(prompt_template)                                # Gemini API
# LLM Suggested Revisions
print("LLM Suggested Revisions：\n")
print(response.text)



# response = client.invoke([{"role": "user", "content": prompt_template}])            # NIV
# # LLM Suggested Revisions
# print("LLM Suggested Revisions：\n")
# print(response.content)

LLM Suggested Revisions：

```json
[
  {
    "label": "prach_msg1_FDM",
    "content": "prach_msg1_FDM                                            = 1;",
    "model_reason": "The double sync message suggests collisions in PRACH. Increasing prach_msg1_FDM to 1 (two occasions) reduces contention and the probability of preamble collisions, which can lead to these duplicate sync messages."
  },
  {
    "label": "ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR",
    "content": "ssb_perRACH_OccasionAndCB_PreamblesPerSSB_PR                = 4;",
    "model_reason": "Increasing the number of RACH occasions per SSB also spreads out the random access attempts, thus reducing the likelihood of preamble collisions and the observed double sync messages."
  }
]
```


In [None]:
import json
import re


def parse_llm_response(text):
    """將 LLM 回傳的 markdown JSON 文字轉成 Python dict"""
    # 移除 markdown 格式包裝
    cleaned = text.strip()
    cleaned = re.sub(r"^```json\s*", "", cleaned)   # 開頭的 ```json
    cleaned = re.sub(r"\s*```$", "", cleaned)       # 結尾的 ```

    # 嘗試解析 JSON
    try:
        parsed = json.loads(cleaned)
        llm_suggestions = [
            {"label": item["label"], "content": item["content"]}
            for item in parsed
        ]
        return llm_suggestions
    except json.JSONDecodeError as e:
        print("❌ JSON 解析失敗:", e)
        print("🔍 原始內容:\n", cleaned)
        return []

# ✅ 用法
llm_suggestions = parse_llm_response(response.text)
print(llm_suggestions)


In [None]:
import re

def apply_llm_suggestions(conf_path, output_path, llm_suggestions):
    # 讀入原始 conf 檔案
    with open(conf_path, "r", encoding="utf-8") as f:
        content = f.read()

    modified_labels = []

    # 依據每個 label 進行替換
    for suggestion in llm_suggestions:
        label = suggestion["label"]
        replacement = suggestion["content"]

        # 用正則表達式找原始設定行
        pattern = rf"{label}\s*=\s*[^;]+;"
        new_content, count = re.subn(pattern, replacement, content)


        if count > 0:
            modified_labels.append(label)
            content = new_content  # 更新 content 為替換後版本
        else:
            print(f"⚠️ No matching setting found ：{label}")

    # 寫入新的 conf 檔案
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(content)

    print(f"✅ Update file ：{output_path}")
    
    # 顯示修改報告
    if modified_labels:
        print("🛠️ Modified parameters：")
        for label in modified_labels:
            print(f" - {label}")
    else:
        print("📭 No parameters were modified")

# ✅ 執行範例
apply_llm_suggestions(
    conf_path  =current_config_path,
    output_path=none_rag_after_conf_path, 
    llm_suggestions=llm_suggestions
)

apply_llm_suggestions(
    conf_path=current_config_json_path,
    output_path=none_rag_after_json_path,
    llm_suggestions=llm_suggestions
)
