In [9]:
# %%
import os
import dotenv
dotenv.load_dotenv()

# %%
from langchain_community.vectorstores import FAISS
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain_openai.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.globals import set_debug, set_verbose

set_verbose(True)
set_debug(True)

# Загрузка эмбеддингов и векторной БД
embedding = OpenAIEmbeddings()
vectorstore = FAISS.load_local("vuln_index", embedding, allow_dangerous_deserialization=True)

# Инициализация модели
model = ChatOpenAI(model_name="gpt-4o-mini")


qa_template = '''
"""
YOU ARE THE WORLD'S LEADING SMART CONTRACT SECURITY ANALYST, RECOGNIZED FOR IDENTIFYING AND MITIGATING CRITICAL VULNERABILITIES IN SOLIDITY CODE AT SCALE.

### OBJECTIVE ###
- THOROUGHLY ANALYZE the provided Solidity code
- CROSS-REFERENCE with the KNOWN VULNERABILITIES CONTEXT supplied
- IDENTIFY potential flaws, risks, and optimization opportunities
- SUGGEST PRECISE FIXES to improve code safety and robustness

### INPUTS ###
- KNOWN VULNERABILITIES CONTEXT:
{context}

- TARGET SOLIDITY CODE TO ANALYZE:
{question}

### OUTPUT FORMAT ###
RETURN a COMPREHENSIVE JSON ARRAY where EACH DETECTED ISSUE follows THIS STRUCTURE:
[
  {{
    "line": "LINE NUMBER WHERE ISSUE OCCURS",
    "token": "EXACT PROBLEMATIC CODE SNIPPET",
    "problem": "CONCISE NAME OF THE ISSUE",
    "severity": "low | medium | high",
    "explanation": "DETAILED REASON WHY THIS IS A PROBLEM INCLUDING POTENTIAL IMPACT",
    "migration": "CLEAR AND ACTIONABLE INSTRUCTION ON HOW TO FIX OR MITIGATE THIS ISSUE"
  }}
]

### CHAIN OF THOUGHTS TO APPLY ###
FOLLOW THIS STEP-BY-STEP LOGIC TO GUIDE YOUR ANALYSIS:

1. UNDERSTAND:
   - READ the known vulnerabilities context carefully
   - COMPREHEND the functionality and purpose of the target Solidity code

2. BASICS:
   - IDENTIFY fundamental security risks related to access control, reentrancy, overflows, gas consumption, etc.

3. BREAK DOWN:
   - DIVIDE the code into logical sections (functions, modifiers, inheritance)

4. ANALYZE:
   - INSPECT each part for known vulnerabilities and undocumented patterns

5. BUILD:
   - ASSEMBLE findings into a CLEAR LIST OF ISSUES using the specified output format

6. EDGE CASES:
   - CONSIDER atypical attacks, compiler version quirks, and gas-related vulnerabilities

7. FINAL ANSWER:
   - PRESENT the fully populated, clear JSON array with no missing fields

### WHAT NOT TO DO ###
- NEVER PROVIDE VAGUE OR INCOMPLETE ISSUE DESCRIPTIONS
- NEVER IGNORE LOW SEVERITY VULNERABILITIES IF THEY EXIST
- NEVER FAIL TO SUGGEST A MIGRATION/FIX EVEN FOR LOW-SEVERITY ISSUES
- NEVER OUTPUT FREE-TEXT PARAGRAPHS OUTSIDE THE JSON STRUCTURE
- NEVER SKIP THE CHAIN OF THOUGHTS LOGIC WHEN ANALYZING

### EXAMPLES OF GOOD DETECTIONS ###
[
  {{
    "line": "42",
    "token": "transfer(_to, amount);",
    "problem": "Unsafe External Call",
    "severity": "high",
    "explanation": "Direct transfer can trigger fallback functions leading to reentrancy attacks.",
    "migration": "Use Checks-Effects-Interactions pattern or reentrancy guards."
  }},
  {{
    "line": "15",
    "token": "uint8 counter;",
    "problem": "Possible Overflow",
    "severity": "medium",
    "explanation": "uint8 can easily overflow; Solidity 0.8+ reverts on overflow but caution still needed for explicit low types.",
    "migration": "Ensure careful increment operations or use SafeMath if below 0.8."
  }}
]

"""

'''
prompt = PromptTemplate(template=qa_template, input_variables=['context', 'question'])


# Создаем chain с retrieval
retriever = vectorstore.as_retriever(search_type="similarity", k=4)

qa_chain = RetrievalQA.from_chain_type(
    llm=model,
    retriever=retriever,
    chain_type="stuff",
    chain_type_kwargs={'prompt': prompt}
)

# %%
# Читаем код из snippets/
snippets_path = "snippets"
files = os.listdir(snippets_path)
code_snippets = []
for file in files:
    with open(os.path.join(snippets_path, file), "r") as f:
        code_snippets.append(f.read())

# %%
# Анализ одного сниппета
code = code_snippets[2]

# %%
# Исправленный вызов chain.invoke() без дублирующихся ключевых аргументов
result = qa_chain.invoke({"query": code})

# %%
print(code)
print(result)


[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "// SPDX-License-Identifier: MIT\npragma solidity ^0.8.0;\n\ncontract PiggyBridge is ReentrancyGuard {\n    mapping(address => uint256) public balances;\n    mapping(bytes32 => bool) public executedTxs;\n\n    function deposit() external payable {\n        balances[msg.sender] += msg.value;\n    }\n\n    function withdraw(uint256 amount, uint256 nonce, bytes memory signature) external nonReentrant {\n        require(balances[msg.sender] >= amount, \"Insufficient balance\");\n\n        bytes32 messageHash = keccak256(abi.encodePacked(msg.sender, amount, nonce));\n        require(!executedTxs[messageHash], \"Already executed\");\n\n        address signer = recoverSigner(messageHash, signature);\n        require(signer == msg.sender, \"Invalid signature\");\n\n        executedTxs[messageHash] = true;\n        balances[msg.sender] -= amount;\n        payable(msg.sender).transfer(amount);\

In [13]:
print(    "Human: \n\"\"\"\nYOU ARE THE WORLD'S LEADING SMART CONTRACT SECURITY ANALYST, RECOGNIZED FOR IDENTIFYING AND MITIGATING CRITICAL VULNERABILITIES IN SOLIDITY CODE AT SCALE.\n\n### OBJECTIVE ###\n- THOROUGHLY ANALYZE the provided Solidity code\n- CROSS-REFERENCE with the KNOWN VULNERABILITIES CONTEXT supplied\n- IDENTIFY potential flaws, risks, and optimization opportunities\n- SUGGEST PRECISE FIXES to improve code safety and robustness\n\n### INPUTS ###\n- KNOWN VULNERABILITIES CONTEXT:\n[Reentrancy Vulnerability]\n\nDescription:\nA reentrancy attack happens when a contract makes an external call to another untrusted contract before it resolves its own internal logic. The untrusted contract can call back into the original function and exploit the inconsistent state.\n\nExample:\nfunction withdraw(uint amount) public {\n    require(balances[msg.sender] >= amount);\n    (bool success, ) = msg.sender.call{value: amount}(\"\");\n    require(success);\n    balances[msg.sender] -= amount;\n}\n\nFix:\nUse Checks-Effects-Interactions pattern and `transfer` instead of `call` where possible.\n\nFixed Example:\nfunction withdraw(uint amount) public {\n    require(balances[msg.sender] >= amount);\n    balances[msg.sender] -= amount;\n    payable(msg.sender).transfer(amount);\n}\n\n[Replay Attack]\n\nDescription:\nA replay attack occurs when a valid transaction is maliciously or fraudulently repeated on the same or different blockchain. In Solidity, this often affects multi-chain deployments or poorly secured signing logic in meta-transactions or off-chain signatures.\n\nExample:\ncontract Token {\n    mapping(address => uint256) public balances;\n    mapping(bytes32 => bool) public executed;\n\n    function transferWithSig(address to, uint256 amount, uint8 v, bytes32 r, bytes32 s) public {\n        bytes32 message = keccak256(abi.encodePacked(to, amount));\n        address signer = ecrecover(message, v, r, s);\n\n        require(signer != address(0), \"Invalid signature\");\n\n        balances[signer] -= amount;\n        balances[to] += amount;\n    }\n}\n\nProblem:\nThe same signature can be replayed multiple times, since thereâ€™s no uniqueness (like nonce or timestamp). An attacker can reuse a previously signed message to drain tokens.\n\nFix:\nInclude a unique value (e.g., nonce or timestamp) in the signed message and store it to prevent reuse.\n\nFixed Example:\ncontract Token {\n    mapping(address => uint256) public balances;\n    mapping(bytes32 => bool) public executed;\n\n    function transferWithSig(address to, uint256 amount, uint256 nonce, uint8 v, bytes32 r, bytes32 s) public {\n        bytes32 message = keccak256(abi.encodePacked(to, amount, nonce));\n        require(!executed[message], \"Already executed\");\n\n        address signer = ecrecover(message, v, r, s);\n        require(signer != address(0), \"Invalid signature\");\n\n        executed[message] = true;\n        balances[signer] -= amount;\n        balances[to] += amount;\n    }\n}\n\n\n[tx.origin Authentication Vulnerability]\n\nDescription:\nUsing `tx.origin` for authentication allows phishing attacks, since `tx.origin` is the original sender of the entire transaction and can be tricked.\n\nExample:\naddress owner;\n\nfunction transfer() public {\n    require(tx.origin == owner);\n    msg.sender.transfer(address(this).balance);\n}\n\nFix:\nUse `msg.sender` for authentication instead of `tx.origin`.\n\nFixed Example:\nfunction transfer() public {\n    require(msg.sender == owner);\n    payable(msg.sender).transfer(address(this).balance);\n}\n\n\n[Integer Overflow and Underflow]\n\nDescription:\nIn Solidity versions <0.8.0, arithmetic operations do not check for overflows or underflows, leading to unexpected behavior.\n\nExample:\nfunction add(uint256 a, uint256 b) public pure returns (uint256) {\n    return a + b;\n}\n\nFix:\nUse Solidity >=0.8.0 or use SafeMath library to handle overflow checks.\n\nFixed Example:\nfunction add(uint256 a, uint256 b) public pure returns (uint256) {\n    unchecked {\n        return a + b;\n    }\n}\n\n- TARGET SOLIDITY CODE TO ANALYZE:\n// SPDX-License-Identifier: MIT\npragma solidity ^0.8.0;\n\ncontract PiggyBridge is ReentrancyGuard {\n    mapping(address => uint256) public balances;\n    mapping(bytes32 => bool) public executedTxs;\n\n    function deposit() external payable {\n        balances[msg.sender] += msg.value;\n    }\n\n    function withdraw(uint256 amount, uint256 nonce, bytes memory signature) external nonReentrant {\n        require(balances[msg.sender] >= amount, \"Insufficient balance\");\n\n        bytes32 messageHash = keccak256(abi.encodePacked(msg.sender, amount, nonce));\n        require(!executedTxs[messageHash], \"Already executed\");\n\n        address signer = recoverSigner(messageHash, signature);\n        require(signer == msg.sender, \"Invalid signature\");\n\n        executedTxs[messageHash] = true;\n        balances[msg.sender] -= amount;\n        payable(msg.sender).transfer(amount);\n    }\n\n    function recoverSigner(bytes32 hash, bytes memory signature) public pure returns (address) {\n        bytes32 ethSignedHash = keccak256(abi.encodePacked(\"\\x19Ethereum Signed Message:\\n32\", hash));\n        (bytes32 r, bytes32 s, uint8 v) = splitSignature(signature);\n        return ecrecover(ethSignedHash, v, r, s);\n    }\n\n    function splitSignature(bytes memory sig) internal pure returns (bytes32 r, bytes32 s, uint8 v) {\n        require(sig.length == 65, \"Invalid signature length\");\n        assembly {\n            r := mload(add(sig, 32))\n            s := mload(add(sig, 64))\n            v := byte(0, mload(add(sig, 96)))\n        }\n    }\n}\n\n### OUTPUT FORMAT ###\nRETURN a COMPREHENSIVE JSON ARRAY where EACH DETECTED ISSUE follows THIS STRUCTURE:\n[\n  {\n    \"line\": \"LINE NUMBER WHERE ISSUE OCCURS\",\n    \"token\": \"EXACT PROBLEMATIC CODE SNIPPET\",\n    \"problem\": \"CONCISE NAME OF THE ISSUE\",\n    \"severity\": \"low | medium | high\",\n    \"explanation\": \"DETAILED REASON WHY THIS IS A PROBLEM INCLUDING POTENTIAL IMPACT\",\n    \"migration\": \"CLEAR AND ACTIONABLE INSTRUCTION ON HOW TO FIX OR MITIGATE THIS ISSUE\"\n  }\n]\n\n### CHAIN OF THOUGHTS TO APPLY ###\nFOLLOW THIS STEP-BY-STEP LOGIC TO GUIDE YOUR ANALYSIS:\n\n1. UNDERSTAND:\n   - READ the known vulnerabilities context carefully\n   - COMPREHEND the functionality and purpose of the target Solidity code\n\n2. BASICS:\n   - IDENTIFY fundamental security risks related to access control, reentrancy, overflows, gas consumption, etc.\n\n3. BREAK DOWN:\n   - DIVIDE the code into logical sections (functions, modifiers, inheritance)\n\n4. ANALYZE:\n   - INSPECT each part for known vulnerabilities and undocumented patterns\n\n5. BUILD:\n   - ASSEMBLE findings into a CLEAR LIST OF ISSUES using the specified output format\n\n6. EDGE CASES:\n   - CONSIDER atypical attacks, compiler version quirks, and gas-related vulnerabilities\n\n7. FINAL ANSWER:\n   - PRESENT the fully populated, clear JSON array with no missing fields\n\n### WHAT NOT TO DO ###\n- NEVER PROVIDE VAGUE OR INCOMPLETE ISSUE DESCRIPTIONS\n- NEVER IGNORE LOW SEVERITY VULNERABILITIES IF THEY EXIST\n- NEVER FAIL TO SUGGEST A MIGRATION/FIX EVEN FOR LOW-SEVERITY ISSUES\n- NEVER OUTPUT FREE-TEXT PARAGRAPHS OUTSIDE THE JSON STRUCTURE\n- NEVER SKIP THE CHAIN OF THOUGHTS LOGIC WHEN ANALYZING\n\n### EXAMPLES OF GOOD DETECTIONS ###\n[\n  {\n    \"line\": \"42\",\n    \"token\": \"transfer(_to, amount);\",\n    \"problem\": \"Unsafe External Call\",\n    \"severity\": \"high\",\n    \"explanation\": \"Direct transfer can trigger fallback functions leading to reentrancy attacks.\",\n    \"migration\": \"Use Checks-Effects-Interactions pattern or reentrancy guards.\"\n  },\n  {\n    \"line\": \"15\",\n    \"token\": \"uint8 counter;\",\n    \"problem\": \"Possible Overflow\",\n    \"severity\": \"medium\",\n    \"explanation\": \"uint8 can easily overflow; Solidity 0.8+ reverts on overflow but caution still needed for explicit low types.\",\n    \"migration\": \"Ensure careful increment operations or use SafeMath if below 0.8.\"\n  }\n]\n\n\"\"\"")

Human: 
"""
YOU ARE THE WORLD'S LEADING SMART CONTRACT SECURITY ANALYST, RECOGNIZED FOR IDENTIFYING AND MITIGATING CRITICAL VULNERABILITIES IN SOLIDITY CODE AT SCALE.

### OBJECTIVE ###
- THOROUGHLY ANALYZE the provided Solidity code
- CROSS-REFERENCE with the KNOWN VULNERABILITIES CONTEXT supplied
- IDENTIFY potential flaws, risks, and optimization opportunities
- SUGGEST PRECISE FIXES to improve code safety and robustness

### INPUTS ###
- KNOWN VULNERABILITIES CONTEXT:
[Reentrancy Vulnerability]

Description:
A reentrancy attack happens when a contract makes an external call to another untrusted contract before it resolves its own internal logic. The untrusted contract can call back into the original function and exploit the inconsistent state.

Example:
function withdraw(uint amount) public {
    require(balances[msg.sender] >= amount);
    (bool success, ) = msg.sender.call{value: amount}("");
    require(success);
    balances[msg.sender] -= amount;
}

Fix:
Use Checks-Effects-Int

In [10]:
result['result']

'```json\n[\n  {\n    "line": "30",\n    "token": "require(!executedTxs[messageHash], \\"Already executed\\");",\n    "problem": "Replay Attack",\n    "severity": "high",\n    "explanation": "If nonces are not uniquely tied to users or if a signature is reused by an attacker, they can execute the withdraw function multiple times, draining funds.",\n    "migration": "Ensure the nonce is unique for each user and track used nonces with a separate mapping to prevent replay attacks."\n  },\n  {\n    "line": "25",\n    "token": "payable(msg.sender).transfer(amount);",\n    "problem": "Reentrancy Vulnerability",\n    "severity": "high",\n    "explanation": "Directly transferring funds to an external address can allow the recipient to call back into the contract and manipulate its state before the transfer is completed.",\n    "migration": "Use the Checks-Effects-Interactions pattern by first updating the state (deducting the balance) and then transferring funds."\n  },\n  {\n    "line": "8",\

In [11]:
print(result['result'])

```json
[
  {
    "line": "30",
    "token": "require(!executedTxs[messageHash], \"Already executed\");",
    "problem": "Replay Attack",
    "severity": "high",
    "explanation": "If nonces are not uniquely tied to users or if a signature is reused by an attacker, they can execute the withdraw function multiple times, draining funds.",
    "migration": "Ensure the nonce is unique for each user and track used nonces with a separate mapping to prevent replay attacks."
  },
  {
    "line": "25",
    "token": "payable(msg.sender).transfer(amount);",
    "problem": "Reentrancy Vulnerability",
    "severity": "high",
    "explanation": "Directly transferring funds to an external address can allow the recipient to call back into the contract and manipulate its state before the transfer is completed.",
    "migration": "Use the Checks-Effects-Interactions pattern by first updating the state (deducting the balance) and then transferring funds."
  },
  {
    "line": "8",
    "token": "mapping(

In [12]:
import json
result_text = result['result']
(result_text[result_text.find("```json") + len("```json"):result_text.find("```", result_text.find("```json") + len("```json"))])
json.loads(result_text[result_text.find("```json") + len("```json"):result_text.find("```", result_text.find("```json") + len("```json"))])

[{'line': '30',
  'token': 'require(!executedTxs[messageHash], "Already executed");',
  'problem': 'Replay Attack',
  'severity': 'high',
  'explanation': 'If nonces are not uniquely tied to users or if a signature is reused by an attacker, they can execute the withdraw function multiple times, draining funds.',
  'migration': 'Ensure the nonce is unique for each user and track used nonces with a separate mapping to prevent replay attacks.'},
 {'line': '25',
  'token': 'payable(msg.sender).transfer(amount);',
  'problem': 'Reentrancy Vulnerability',
  'severity': 'high',
  'explanation': 'Directly transferring funds to an external address can allow the recipient to call back into the contract and manipulate its state before the transfer is completed.',
  'migration': 'Use the Checks-Effects-Interactions pattern by first updating the state (deducting the balance) and then transferring funds.'},
 {'line': '8',
  'token': 'mapping(bytes32 => bool) public executedTxs;',
  'problem': 'Poor 