In [2]:
high_risk_prompt = """你精通python代码的静态检查相关的工具和原理，假设我拿到了python工具bandit扫描的结果，结果是json格式的，我需要分析这个结果从而确认是否误报，是否需要高优先级修复。扫描结果格式如下所示：
```json
{{
  "metrics": {{
    "_totals": {{
      "CONFIDENCE.HIGH": <所有文件中扫描出的问题信心指数高的个数>,
      "CONFIDENCE.LOW": <所有文件中扫描出的问题信心指数低的个数>,
      "CONFIDENCE.MEDIUM":<所有文件中扫描出的问题信心指数中的个数>,
      "CONFIDENCE.UNDEFINED": <所有文件中扫描出的问题信心指数不确定的个数>,
      "SEVERITY.HIGH": <所有文件中扫描出的问题严重性高的个数>,
      "SEVERITY.LOW": <所有文件中扫描出的问题严重性低的个数>,
      "SEVERITY.MEDIUM": <所有文件中扫描出的问题严重性的中的个数>,
      "SEVERITY.UNDEFINED": <所有文件中扫描出的问题严重性不确定的个数>,
      "loc": <所有文件代码行数>,
      "nosec": <所有文件nosec打标数量>,
      "skipped_tests": <所有文件跳过的测试数量>
    }},
    "results":[<检查工具扫描出的问题列表>
    <example>
    {{
      "code": "3 import json\n4 import requests\n5 import subprocess\n6 import logging\n7 \n",
      "col_offset": 0,
      "end_col_offset": 17,
      "filename": "./bench/common/system.py",
      "issue_confidence": "HIGH",
      "issue_cwe": {{
        "id": 78,
        "link": "https://cwe.mitre.org/data/definitions/78.html"
      }},
      "issue_severity": "LOW",
      "issue_text": "Consider possible security implications associated with the subprocess module.",
      "line_number": 5,
      "line_range": [
        5
      ],
      "more_info": "https://bandit.readthedocs.io/en/1.8.0/blacklists/blacklist_imports.html#b404-import-subprocess",
      "test_id": "B404",
      "test_name": "blacklist"
    }}
    </example>
    ]
  }}
}}
```

你需要做的事情是：
1.请根据给出的json数据，帮我找出总共有多少高危（Severity = HIGH）、中危（Severity = MEDIUM）、低危（Severity = LOW）的漏洞。
2.输出扫描结果中高危的问题列表。
3.根据扫描结果中metrics记录，帮我找出扫描结果中，存在高危漏洞的文件列表。

要求：
1.根据我的要求帮我完成上述任务，不要做其他事情。
2.不需要输出任何解释，只需要按照我的要求做完事就好。
3.输出必须是json格式，包括扫描结果的总数量和问题列表。
4.输出结果不需要包括```json, ```这样符号。
5.输出格式如下：
```json
{{
  "total_vulnerabilities":{{
    "high_severity": <高危问题数量>,
    "medium_severity": <中危问题数量>,
    "low_severity": <低危问题数量>,
  }},
  "high_severity_issues": [
    {{
      "code": <问题代码>,
      "col_offset": <问题的描述>,
      "end_col_offset": <问题的严重程度>,
      "filename": <问题的修复建议>,
      "issue_confidence": <参考资料链接>,
      "issue_cwe": {{
        "id": <CWE编号>,
        "name": <CWE名称>
      }},
      "issue_severity": <问题严重等级>,
      "issue_text": <问题描述>,
      "line_range": <问题所在行>,
      "more_info": <问题修复建议>,
      "test_id": <问题类型>,
      "test_name": <问题类型名称>
    }},
    ...
  ],
  "files_with_high_severity_issues": [
    <文件路径>,
    ...
  ],
}}
```

json数据：
```json
{json_data}
```
"""

In [4]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv("./env/.env"))

import dashscope
from http import HTTPStatus
from pprint import pprint
import json

from langchain.prompts import ChatPromptTemplate
from langchain_community.chat_models import ChatTongyi

llm_model = "qwen-max"

llm = ChatTongyi(temperature=1.0, model=llm_model)

with open("../notebook/data/sysom-3.3.0/result.json", "r") as f:
    json_data = json.load(f)

json_data_simplified = {}
for item in json_data["metrics"]:
    json_data_simplified["metrics"] = {}
    if item == "_totals":
        json_data_simplified["metrics"][item] = json_data["metrics"][item]

chunks = [
    json_data["results"][x : x + 5] for x in range(0, len(json_data["results"]), 5)
]
count = 0
for chunk in chunks:
    count += len(chunk)

print(count)

high_risk_analysis_prompt = ChatPromptTemplate.from_template(high_risk_prompt)
high_risks = []
for idx, chunk in enumerate(chunks):
    print(f"chunk {idx}")
    json_data_simplified["results"] = chunk
    print(len(json_data_simplified["results"]))
    high_risk_analysis_prompt.format_messages(json_data=json_data_simplified)

def batch_chunks(chunks):
    batch_chunks = []
    for chunk in chunks:
        json_data_simplified["results"] = chunk
        batch_chunks.append(
            high_risk_analysis_prompt.format_messages(json_data=json_data_simplified)
        )
    return batch_chunks

for i in range(0, len(chunks), 5):
    batch_chunks = batch_chunks(chunks[i : i + 5])
    response_list = llm.batch(batch_chunks)
    for response in response_list:
        print(response)
    break

53
chunk 0
5
chunk 1
5
chunk 2
5
chunk 3
5
chunk 4
5
chunk 5
5
chunk 6
5
chunk 7
5
chunk 8
5
chunk 9
5
chunk 10
3
content='{\n  "total_vulnerabilities": {\n    "high_severity": 53,\n    "medium_severity": 61,\n    "low_severity": 236\n  },\n  "high_severity_issues": [\n    {\n      "code": "460     return True\\n461 def do_cmd(cmd):\\n462     output = os.popen(cmd)\\n463     ret = output.read().strip()\\n464     output.close()\\n",\n      "col_offset": 13,\n      "end_col_offset": 26,\n      "filename": "./script/server/sysom_vmcore/parse_panic.py",\n      "issue_confidence": "HIGH",\n      "issue_cwe": {\n        "id": 78,\n        "name": "Improper Neutralization of Special Elements used in an OS Command (\'OS Command Injection\')"\n      },\n      "issue_severity": "HIGH",\n      "issue_text": "Starting a process with a shell, possible injection detected, security issue.",\n      "line_range": [462],\n      "more_info": "https://bandit.readthedocs.io/en/1.8.0/plugins/b605_start_proc