In [15]:
import yaml
import json
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
import pprint
import google.generativeai as genai

# 指定 log 檔案
du_log_file = "/home/aiml/johnson/Scenario/Scenario_3/DU/log/Scenario_3.log"
ru_log_file = "/home/aiml/johnson/Scenario/Scenario_3/RU/log/RU.log"

pcap_path = "/home/aiml/johnson/Scenario/Scenario_3/FH/fh.pcap"

reference_context_path = "/home/aiml/johnson/Scenario/Scenario_3/reference_config.txt"

current_config_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3.conf"
current_config_json_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3.conf.segments.json"

after_conf_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf"
after_json_path="/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf.segments.json"

In [16]:
yaml_path = "/home/aiml/johnson/thesis_rag/Integration_dataset/debug.yaml"
with open( yaml_path , "r") as f:
    debug_data = yaml.safe_load(f)

# 將每一筆資料嵌入的格式（以 symptom + log 為主）
embedding_docs = []
for item in debug_data:
    content = f"Stage: {item['stage']}\nSymptom: {item['symptom']}\nLog: {item['log_snippet']}"
    related_config_str = ", ".join(item["related_config"])  # ✅ Convert list to comma-separated string
    metadata = {
        "stage": item["stage"],
        "related_config": related_config_str
    }
    embedding_docs.append({"content": content, "metadata": metadata})

# pprint.pprint(embedding_docs) #for checking


In [17]:
# 你可以改成 Gemini 或 OpenAI Embedding
embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 載入 embedding_docs，取出 texts 與 metadata
texts = [d["content"] for d in embedding_docs]
metadatas = [d["metadata"] for d in embedding_docs]

# 初始化 Chroma 向量資料庫（若已存在會讀取）
vectordb = Chroma(persist_directory="./error_db", embedding_function=embedding)

# Step 1️⃣ 取得已存在的 documents 做比對
existing = vectordb._collection.get()
existing_docs = set(existing["documents"]) if "documents" in existing else set()

# Step 2️⃣ 過濾掉重複的內容
filtered_texts = []
filtered_metadatas = []

for text, meta in zip(texts, metadatas):
    if text not in existing_docs:
        filtered_texts.append(text)
        filtered_metadatas.append(meta)

# Step 3️⃣ 僅儲存非重複部分
if filtered_texts:
    vectordb.add_texts(filtered_texts, metadatas=filtered_metadatas)
    vectordb.persist()
    print(f"✅ 新增 {len(filtered_texts)} 筆非重複嵌入資料")
else:
    print("⚠️ 無新增資料，皆為重複項目")

# Optional: 檢查儲存狀態
print("📦 總筆數：", vectordb._collection.count())


# 顯示前幾筆嵌入資料內容（包括原始文本與 metadata）
peek_data = vectordb._collection.get(limit=6)

for i in range(len(peek_data["documents"])):
    print(f"\n--- Entry {i+1} ---")
    print("Document ID:", peek_data["ids"][i])
    print("Document Text:", peek_data["documents"][i])
    print("Metadata:", peek_data["metadatas"][i])

⚠️ 無新增資料，皆為重複項目
📦 總筆數： 6

--- Entry 1 ---
Document ID: 2490cd60-a3d3-4485-bbcc-dfb55ee1d0d9
Document Text: Stage: fh_setup
Symptom: PCAP file contains zero packets, likely due to incorrect MAC address configuration
Log: [FH] No packets captured – possible mismatch in MAC address between DU and RU
Metadata: {'related_config': 'ru_addr, du_addr', 'stage': 'fh_setup'}

--- Entry 2 ---
Document ID: 41214951-d646-4f62-bfcd-23f34c016659
Document Text: Stage: fh_setup
Symptom: DU reports continuous 'Received Time doesn't correspond to the time we think it is' and 'Jump in frame counter' errors after XRAN start. Observed mismatch between expected and received frame/slot numbers and frequent double sync detection.
Log: [PHY]   Jump in frame counter last_frame 86 => 167, slot 19; [PHY]   Received Time doesn't correspond to the time we think it is (slot mismatch, received 167.19, expected 86.11); [PHY]   Detected double sync message 371.6 => 371.7
Metadata: {'related_config': 'iq_width_prach, iq_

## Check RU LOG

In [18]:
import re

def extract_ru_log_info(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Step 1: 優先找 ERNO 與其後續錯誤描述
    erno_pattern = re.compile(r"ERNO 0x[0-9A-Fa-f]+\s+0x[0-9A-Fa-f]+\s+0x[0-9A-Fa-f]+")
    error_blocks = []
    i = 0

    while i < len(lines):
        if erno_pattern.search(lines[i]):
            block = [lines[i]]
            ts_prefix = lines[i][:17]  # timestamp 開頭
            i += 1
            while i < len(lines):
                if lines[i].startswith(ts_prefix) or ">>>Oran" in lines[i] or " at line " in lines[i]:
                    block.append(lines[i])
                    i += 1
                else:
                    break
            error_blocks.append("".join(block))
        else:
            i += 1

    if error_blocks:
        print("=== ERNO Errors with Traceback ===")
        return "\n".join(error_blocks)

    # Step 2: 擷取 RX-WINDOW-STATS + RX-WINDOW-TIMING 整段
    stats_started = False
    timing_block = []

    for line in lines:
        if "RX-WINDOW-STATS" in line:
            stats_started = True
        if stats_started:
            timing_block.append(line)
            if "RX_LATEST_C_DL" in line:
                break  # 到 timing 最後一項為止

    print("=== Timing Window Extracted ===")
    return "".join(timing_block)

log_result = extract_ru_log_info(ru_log_file)
print(log_result)

query = log_result


=== ERNO Errors with Traceback ===
29/04/25 13:51:48:073	I: ERNO 0x00000002 0x00000000 0x00000000 Assert Fail: "
29/04/25 13:51:48:073	 >>>FhsDl_0_NR_I0: Received Payloadsize 1337 is higher than Expected Payloadsize 1492 " in ../tm_oran_app/oran_ru/oran_ru_dl_cuplane/code/oran_ru_dl_cuplane.cpp at line 1275

29/04/25 13:51:53:162	I: ERNO 0x00000002 0x00000000 0x00000000 Assert Fail: "
29/04/25 13:51:53:162	 >>>OranRxEth_FE_0_I0: m_RngBuf full." in ../tm_oran_app/oran_ru/oran_ru_fifos/interface/oran_uplane_fifo.hpp at line 238
29/04/25 13:51:53:218	I: ERNO 0x00000002 0x00000000 0x00000000 Assert Fail: "
29/04/25 13:51:53:218	 >>>OranRxEth_FE_0_I0: m_RngBuf full." in ../tm_oran_app/oran_ru/oran_ru_fifos/interface/oran_uplane_fifo.hpp at line 238
29/04/25 13:51:53:258	I: ERNO 0x00000002 0x00000000 0x00000000 Assert Fail: "
29/04/25 13:51:53:258	 >>>OranRxEth_FE_1_I0: m_RngBuf full." in ../tm_oran_app/oran_ru/oran_ru_fifos/interface/oran_uplane_fifo.hpp at line 238
29/04/25 13:51:53:292	I:

## Check FH

In [19]:
import subprocess
from scapy.utils import RawPcapReader


# pcap_path = "/home/aiml/johnson/thesis_rag/fh_pcap_sample/normal.pcap"

packet_count = 0

# Filter the packet content through tshark and search for the keywords "C-Plane" and "U-Plane"
def count_plane_packets(keyword):
    result = subprocess.run(
        ["tshark", "-r", pcap_path],
        capture_output=True,
        text=True
    )
    lines = result.stdout.splitlines()
    return sum(1 for line in lines if keyword in line)

def get_packet_count(pcap_file):
    result = subprocess.run(
        ["tshark", "-r", pcap_file, "-q", "-z", "io,stat,0"],
        capture_output=True,
        text=True
    )
    for line in result.stdout.splitlines():
        if "|   " in line and "Frames" in line:
            try:
                fields = line.split("|")
                frame_info = fields[2].strip()  # Ex: "X frames"
                frame_count = int(frame_info.split()[0])
                return frame_count
            except Exception:
                pass
    return 0


for (pkt_data, pkt_metadata) in RawPcapReader(pcap_path):
    packet_count += 1

c_plane_count = count_plane_packets("C-Plane")
u_plane_count = count_plane_packets("U-Plane")
print("Control_Plane_Packets: ", c_plane_count)
print("User_Plane_Packets: ", u_plane_count)


FileNotFoundError: [Errno 2] No such file or directory: '/home/aiml/johnson/Scenario/Scenario_3/FH/fh.pcap'

## Check DU log

In [20]:
from pathlib import Path


# 要搜尋的兩個目標字串（注意這裡拿掉了控制字元，只比對有意義的訊息）
target_messages = [
    "[SCTP]   Connect failed: Connection refused",
    "[NGAP]   Received unsuccessful result for SCTP association (3), instance 0, cnx_id 1",
    "[PHY]   Received Time doesn't correspond to the time we think it is (slot mismatch",
    "[PHY]   Received Time doesn't correspond to the time we think it is (frame mismatch"
]

# 確認 log 檔案存在
if not Path(du_log_file).exists():
    raise FileNotFoundError(f"Log file not found: {du_log_file}")

# 讀取檔案並檢查
found_targets = {msg: False for msg in target_messages}

with open(du_log_file, 'r', encoding='utf-8', errors='ignore') as f:
    for line in f:
        for target in target_messages:
            if target in line:
                found_targets[target] = True

# 顯示檢查結果
for target, found in found_targets.items():
    if found:
        print(f"✅ Found: {target}")
        query = f"Stage: {target}"
    else:
        print(f"❌ Not found: {target}")


❌ Not found: [SCTP]   Connect failed: Connection refused
❌ Not found: [NGAP]   Received unsuccessful result for SCTP association (3), instance 0, cnx_id 1
✅ Found: [PHY]   Received Time doesn't correspond to the time we think it is (slot mismatch
✅ Found: [PHY]   Received Time doesn't correspond to the time we think it is (frame mismatch


In [21]:
results = vectordb.similarity_search(query, k=2)

for r in results:
    print("- Matched:", r.page_content)
    print("- Related config:", r.metadata["related_config"])
    print("-----------------------------------------------")

matched_case = results[0]
matched_symptom = matched_case.page_content
matched_related_config = matched_case.metadata.get("related_config", "")


- Matched: Stage: fh_setup
Symptom: DU reports continuous 'Received Time doesn't correspond to the time we think it is' and 'Jump in frame counter' errors after XRAN start. Observed mismatch between expected and received frame/slot numbers and frequent double sync detection.
Log: [PHY]   Jump in frame counter last_frame 86 => 167, slot 19; [PHY]   Received Time doesn't correspond to the time we think it is (slot mismatch, received 167.19, expected 86.11); [PHY]   Detected double sync message 371.6 => 371.7
- Related config: iq_width_prach, iq_width
-----------------------------------------------
- Matched: Stage: fh_setup
Symptom: DU reports continuous 'Received Time doesn't correspond to the time we think it is' and 'Jump in frame counter' errors after XRAN start. Observed mismatch between expected and received frame/slot numbers and frequent double sync detection.
Log: [PHY]   Jump in frame counter last_frame 86 => 167, slot 19; [PHY]   Received Time doesn't correspond to the time we

In [22]:
with open(current_config_json_path, "r") as f:
    config_segments_context = json.load(f)
with open(reference_context_path, "r") as f:
    reference_context = f.read()

prompt_template = f"""
You are a 5G network expert. Your job is to revise configuration files based on observed network issues and debug knowledge.

Issue Description:
"{query}"

Matching debug knowledge:
{matched_case.page_content}
Relevant parameters: {matched_case.metadata["related_config"]}

Reference Device Address Table (external reference file):
{reference_context}

Current configuration block:
{config_segments_context}

Please revise the configuration using correct addresses from the reference. Output only the revised config section.

Return a list of JSON objects with the following structure:
[
  {{
    "label": "parameter_name",
    "content": "parameter_name = (...);",
    "reference_reason": "Short explanation matching the value to the reference device table (e.g., correct MAC, matches expected setting).",
    "model_reason": "Additional expert analysis in 1-2 sentences explaining why this change is necessary, beneficial, or resolves a network issue."
  }},
  ...
]

- Only include parameters listed in 'Relevant parameters'.
- Do not include any explanation outside of the JSON structure.
- Keep "reference_reason" based on the reference table.
- Derive "model_reason" from your own technical reasoning.

"""



## LLM API 設定

In [23]:
genai.configure(api_key="AIzaSyCSK7WFIon0kt_iPbqvzaJqwI9vNE5mwdM")
model = genai.GenerativeModel("gemini-2.0-flash")
# for m in genai.list_models():
#     print(m.name)

####################################################################################################################################################################################

# from langchain_nvidia_ai_endpoints import ChatNVIDIA

# client = ChatNVIDIA(
#   model="meta/llama-3.1-70b-instruct",
#   api_key="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", 
#   temperature=0,
#   top_p=0.7,
#   max_tokens=1024,
# )

# for chunk in client.stream([{"role":"user","content":""}]): 
#   print(chunk.content, end="")
# response = client.invoke([{"role": "user", "content": prompt_template}])
# print(response.content)



In [24]:
response = model.generate_content(prompt_template)                                # Gemini API
# LLM Suggested Revisions
print("LLM Suggested Revisions：\n")
print(response.text)



# response = client.invoke([{"role": "user", "content": prompt_template}])            # NIV
# # LLM Suggested Revisions
# print("LLM Suggested Revisions：\n")
# print(response.content)

LLM Suggested Revisions：

```json
[
  {
    "label": "iq_width_prach",
    "content": "iq_width_prach = 9;",
    "reference_reason": "Matches iq_width_prach value from reference_info.",
    "model_reason": "The mismatch in frame timing suggests synchronization issues. Increasing the IQ width may improve timing resolution and reduce frame mismatches by providing more precise synchronization information."
  },
  {
    "label": "iq_width",
    "content": "iq_width = 9;",
    "reference_reason": "Matches iq_width value from reference_info.",
    "model_reason": "In addition to PRACH, adjusting the general iq_width may also improve the accuracy of signal processing and reduce synchronization errors, preventing frame counter jumps and double sync detections."
  }
]
```


In [25]:
import json
import re


def parse_llm_response(text):
    """將 LLM 回傳的 markdown JSON 文字轉成 Python dict"""
    # 移除 markdown 格式包裝
    cleaned = text.strip()
    cleaned = re.sub(r"^```json\s*", "", cleaned)   # 開頭的 ```json
    cleaned = re.sub(r"\s*```$", "", cleaned)       # 結尾的 ```

    # 嘗試解析 JSON
    try:
        parsed = json.loads(cleaned)
        llm_suggestions = [
            {"label": item["label"], "content": item["content"]}
            for item in parsed
        ]
        return llm_suggestions
    except json.JSONDecodeError as e:
        print("❌ JSON 解析失敗:", e)
        print("🔍 原始內容:\n", cleaned)
        return []

# ✅ 用法
llm_suggestions = parse_llm_response(response.text)
print(llm_suggestions)


[{'label': 'iq_width_prach', 'content': 'iq_width_prach = 9;'}, {'label': 'iq_width', 'content': 'iq_width = 9;'}]


In [26]:
import re

def apply_llm_suggestions(conf_path, output_path, llm_suggestions):
    # 讀入原始 conf 檔案
    with open(conf_path, "r", encoding="utf-8") as f:
        content = f.read()

    # 依據每個 label 進行替換
    for suggestion in llm_suggestions:
        label = suggestion["label"]
        replacement = suggestion["content"]
        # 用正則表達式抓出對應的設定行，並替換
        pattern = rf"{label}\s*=\s*\(.*?\);"
        content = re.sub(pattern, replacement, content)

    # 寫入新的 conf 檔案
    with open(output_path, "w", encoding="utf-8") as f:
        f.write(content)

    print(f"✅ Update file ：{output_path}")

# ✅ 執行範例
apply_llm_suggestions(
    conf_path  =current_config_path,
    output_path=after_conf_path, 
    llm_suggestions=llm_suggestions
)

apply_llm_suggestions(
    conf_path=current_config_json_path,
    output_path=after_json_path,
    llm_suggestions=llm_suggestions
)


✅ Update file ：/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf
✅ Update file ：/home/aiml/johnson/Scenario/Scenario_3/DU/conf/Scenario_3_modification_1.conf.segments.json
