#  **Skip these codes and go down and find FINAL CODE :)**
# **the following code is used for finding the vulnerable entities.**

In [1]:
import json, openai
import os, sys, re
from openai import OpenAI
from tqdm.notebook import tqdm
import time
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)
from config.keys import OPENAI_API_KEY

In [2]:
class Generator:
    def __init__(self):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.json_formatter = "Return the response in RFC8259 compliant JSON according to the ResponseFormat schema with no other text."
        self.message = [{
            "role": "system",
            "content": "You are a cyber-security programmer that can detect line numbers from the contract based on the instruction."
        }]
        self.output_formatter = """
This should instruct the model to output exactly the vulnerability lines, ensuring it doesn't output extra lines or large ranges that contain unrelated code.

Response Schema:
 [
    {
      "start_line": <exact_start_line_number>,
      "end_line": <exact_end_line_number>,
      "code": [
        "vulnerable line 1",
        "vulnerable line 2",
        "... (and so on)"
      ]
    }
 ]
 
** Do not use ```json or any other extra texts in the output. Include only the list of detected lines as the schema.
"""
        self.user_prefix = """You are given a record from a dataset containing smart contract vulnerability analyses. Each record contains an "id", "prompt", and "completion". The "prompt" includes both the instructions and the smart contract code snippet, while the "completion" provides the vulnerability analysis.

Your task is to extract the exact lines of code that are vulnerable from the smart contract code. If the contract is vulnerable, output the minimal range of lines (with exact start and end line numbers) and the code lines themselves in the JSON Response Schema format provided below. If the contract is safe, do not output any JSON (i.e. return an empty result).

Instructions:
- Use the provided numbered contract for vulnerability detection.
- If the completion analysis indicates that the contract is vulnerable (i.e. it does not state that the contract is safe), locate the vulnerable lines of code precisely.
- Do not include any extra commentary or unrelated code; only include the vulnerable lines.
"""
    def get_user_message(self, dataset_output, contract):
        self.user_content = f"""
{self.user_prefix}

This is the helping document to find the lines of vulnerable codes.
Guideline:
{dataset_output}

Smart contract code:
{contract}

Additional Note:
- Only output the minimal range of code lines that are directly vulnerable.
- Do not include any commentary or unrelated code.
- Follow the JSON Response Schema exactly:
---
{self.output_formatter}

###
"""
        self.user_message = {"role": "user", "content": self.user_content}
    def create_prompt(self, dataset_output, contract):
        self.get_user_message(dataset_output, contract)
        self.message.append(self.user_message)
        
    def generate(self):
        completion = self.client.chat.completions.create(
          model="gpt-4o-mini",
          messages = self.message,
          temperature=0.1,
          max_tokens=3200,
          top_p=1.,
          frequency_penalty=0,
          presence_penalty=0,
          stop=None
        )
        answer = completion.choices[0].message.content
        return answer, completion


In [3]:
dataset_name = "train_TrustLLM"
output_dir = os.path.join("..", "..", "data", "processed_data", dataset_name)
os.makedirs(output_dir, exist_ok=True)

contracts_dir = os.path.join(output_dir, "contracts")
os.makedirs(contracts_dir, exist_ok=True)

locs_dir = os.path.join(output_dir, "LOCs")
os.makedirs(locs_dir, exist_ok=True)

raw_dir = os.path.join("..", "..", "data", "dataset", "raw")
dataset_path = os.path.join(raw_dir, f"{dataset_name}.json")

if not os.path.exists(dataset_path):
    raise FileNotFoundError(f"Dataset file not found at: {dataset_path}")

In [None]:
with open(dataset_path, "r", encoding="utf-8") as file:
    data = json.load(file)
print("Total records:", len(data))
print(data[0]["prompt"])

In [5]:
ids = [record.get("id") for record in data]
if len(ids) == len(set(ids)):
    print("All record ids are unique.")
else:
    print("There are duplicate record ids.")

There are duplicate record ids.


In [12]:
end_contract = 1
vul_idx = 0  # Counter for vulnerable smart contracts

for idx, record in enumerate(tqdm(data[:end_contract], desc="Processing Records")):
    prompt_text = record.get("prompt", "")
    completion_text = record.get("completion", "")
    
    # Extract smart contract code from the prompt.
    # This regex now matches any code block enclosed in triple backticks regardless of the language tag.
    code_match = re.search(r"```[^\n]*\n(.*?)\n```", prompt_text, re.DOTALL)
    if code_match:
        contract_code = code_match.group(1).strip()
    else:
        print(f"Record {idx}: No smart contract code found in prompt.")
        continue
    ########## Add line numbers to the contract code
    
    lines = contract_code.split("\n")
    numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
    numbered_contract = "\n".join(numbered_lines)
    print(contract_code)
    # Save the contract with numbered lines
    sol_filepath = os.path.join(contracts_dir, f"{vul_idx}.sol")
    with open(sol_filepath, 'w', encoding='utf-8') as f:
        f.write(numbered_contract)

    print(f"Extracting vulnerability for Record {idx} (vulnerable contract index: {vul_idx})...")
    
    # Skip safe contracts
    if "appears to be safe" in completion_text.lower():
        print(f"Record {idx} is marked as safe. Skipping vulnerability extraction.")
        continue
    # Otherwise, if it does not mention "the issue", assume there is no clear vulnerability description.
    if "the issue" not in completion_text.lower():
        print(f"Record {idx} does not clearly indicate a vulnerability. Skipping vulnerability extraction.")
        continue
    
    # Save the vulnerable smart contract code to a .sol file using vul_idx for numbering
    sol_filepath = os.path.join(contracts_dir, f"{vul_idx}.sol")
    with open(sol_filepath, 'w', encoding='utf-8') as f:
        f.write(contract_code)
    
    # Add line numbers to the contract code for vulnerability mapping
    lines = contract_code.split("\n")
    numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
    numbered_contract = "\n".join(numbered_lines)
    
    print(f"Extracting vulnerability for Record {idx} (vulnerable contract index: {vul_idx})...")
    
    generator = Generator()
    # Use the 'completion' as the guideline for vulnerability extraction
    generator.create_prompt(completion_text, numbered_contract)
    start_time = time.time()
    answer, _ = generator.generate()
    print(f"Vulnerability extraction for Record {idx} completed in {round(time.time() - start_time, 2)} seconds")
    
    # Parse the answer as JSON and save the vulnerability details if extraction was successful.
    try:
        vulnerability_data = json.loads(answer)
    except Exception as e:
        print(f"Error parsing vulnerability JSON for Record {idx}: {e}")
        continue
    
    json_filepath = os.path.join(locs_dir, f"{vul_idx}.json")
    with open(json_filepath, "w", encoding="utf-8") as json_file:
        json.dump(vulnerability_data, json_file, indent=4)
    print(f"Saved vulnerability data for Record {idx} to {json_filepath}")
    
    vul_idx += 1

print("Process completed!")

Processing Records:   0%|          | 0/1 [00:00<?, ?it/s]

function vest( address to, uint256 amount, uint256 vestPeriodInSeconds ) external returns (uint256 vestIdx) { require(vestPeriodInSeconds > 0, "Vesting: vestPeriodInSeconds == 0"); token.safeTransferFrom(msg.sender, address(this), amount); vestIdx = accountVestList[to].length; accountVestList[to].push( Vest({ amount: amount, vestPeriodInSeconds: vestPeriodInSeconds, creationTimestamp: now, withdrawnAmount: 0 }) ); }
Extracting vulnerability for Record 0 (vulnerable contract index: 0)...
Record 0 is marked as safe. Skipping vulnerability extraction.
Process completed!


In [2]:
#print(generator.message[1]['content'])

In [3]:
#print(answer)

In [9]:
text = "Below is an instruction that describes a reasoning task.\nCarefully assess the contributing factors and their interplay. Utilize the label name to form a coherent reasoning.\n### Instruction:\nPlease analyze the code function and its dependencies, including both incoming and outgoing calls. Considering the label name, identify the underlying cause.\n### Input:\nThe method FuseTokenAdapterV1 in the smart contract wrap\n```Solidiy\nfunction wrap( uint256 amount, address recipient ) external onlyAlchemist returns (uint256) { SafeERC20.safeTransferFrom(underlyingToken, msg.sender, address(this), amount); SafeERC20.safeApprove(underlyingToken, token, amount); uint256 startingBalance = IERC20(token).balanceOf(address(this)); uint256 error; if ((error = ICERC20(token).mint(amount)) != NO_ERROR) { revert FuseError(error); } uint256 endingBalance = IERC20(token).balanceOf(address(this)); uint256 mintedAmount = endingBalance - startingBalance; SafeERC20.safeTransfer(token, recipient, mintedAmount); return mintedAmount; }\n```\n## Dependency Overview:\n### As a Caller:\nFunctions called by FuseTokenAdapterV1:\n```\nSafeERC20.safeTransferFrom\nFuseTokenAdapterV1.address\nSafeERC20.safeApprove\nFuseTokenAdapterV1.IERC20\nFuseTokenAdapterV1.ICERC20\nFuseTokenAdapterV1.FuseError\nSafeERC20.safeTransfer\nmodifier onlyAlchemist() { if (msg.sender != alchemist) { revert Unauthorized(\"Not alchemist\"); } _; }\n```\n### Response:\nGiven that the code is labeled vulnerable, let's determine the reason by breaking down the process."
# solidity_code = re.search(r'Solidiy(.*?)', text, re.DOTALL)

import re

# text = """
# ```solidity

# some texts

# ```solidity

# The rest
# """

# Regex pattern to match Solidity code blocks
pattern = r"```Solidiy\n(.*?)\n```"

matches = re.findall(pattern, text, re.DOTALL)

# Print extracted Solidity code blocks
for i, match in enumerate(matches, 1):
    print(match)


function wrap( uint256 amount, address recipient ) external onlyAlchemist returns (uint256) { SafeERC20.safeTransferFrom(underlyingToken, msg.sender, address(this), amount); SafeERC20.safeApprove(underlyingToken, token, amount); uint256 startingBalance = IERC20(token).balanceOf(address(this)); uint256 error; if ((error = ICERC20(token).mint(amount)) != NO_ERROR) { revert FuseError(error); } uint256 endingBalance = IERC20(token).balanceOf(address(this)); uint256 mintedAmount = endingBalance - startingBalance; SafeERC20.safeTransfer(token, recipient, mintedAmount); return mintedAmount; }


In [4]:
#print('''unction wrap( uint256 amount, address recipient ) external onlyAlchemist returns (uint256) { SafeERC20.safeTransferFrom(underlyingToken, msg.sender, address(this), amount); SafeERC20.safeApprove(underlyingToken, token, amount); uint256 startingBalance = IERC20(token).balanceOf(address(this)); uint256 error; if ((error = ICERC20(token).mint(amount)) != NO_ERROR) { revert FuseError(error); } uint256 endingBalance = IERC20(token).balanceOf(address(this)); uint256 mintedAmount = endingBalance - startingBalance; SafeERC20.safeTransfer(token, recipient, mintedAmount); return mintedAmount; }\n```\n## Dependency Overview:\n### As a Caller:\nFunctions called by FuseTokenAdapterV1:\n```\nSafeERC20.safeTransferFrom\nFuseTokenAdapterV1.address\nSafeERC20.safeApprove\nFuseTokenAdapterV1.IERC20\nFuseTokenAdapterV1.ICERC20\nFuseTokenAdapterV1.FuseError\nSafeERC20.safeTransfer\nmodifier onlyAlchemist() { if (msg.sender != alchemist) { revert Unauthorized(\"Not alchemist\"); } _; }\n''')

#                                                       **skip the above code**

###  **The following code is used for seperating timestamp depenedency from the whole dataset: Train_TrustLLM**

In [None]:
import json
import os
import re
from tqdm import tqdm

dataset_name = "train_TrustLLM"

output_dir = os.path.join("..", "..", "data", "processed_data", dataset_name)
os.makedirs(output_dir, exist_ok=True)

contracts_dir = os.path.join(output_dir, "contracts")
os.makedirs(contracts_dir, exist_ok=True)

locs_dir = os.path.join(output_dir, "LOCs")
os.makedirs(locs_dir, exist_ok=True)

# Path for raw dataset
raw_dir = os.path.join("..", "..", "data", "dataset", "raw")
dataset_path = os.path.join(raw_dir, f"{dataset_name}.json")

with open(dataset_path, "r", encoding="utf-8") as file:
    data = json.load(file)

print("Total records:", len(data))
print("Example record:", data[0]["prompt"])

integer_keywords = ["integer overflow", "integer underflow", "addition overflow", "subtraction underflow", "arithmetic overflow"]
timestamp_keywords = ["timestamp", "block.timestamp", "now", "time manipulation"]

integer_vulnerabilities = []
timestamp_vulnerabilities = []
loc_data = []

vul_idx = 0

for idx, record in enumerate(tqdm(data, desc="Processing Records")):
    prompt_text = record.get("prompt", "")
    completion_text = record.get("completion", "")
    
    # Extract smart contract code using regex
    code_match = re.search(r"```(?:solidity|Solidiy)?\n(.*?)\n```", prompt_text, re.DOTALL)
    if code_match:
        contract_code = code_match.group(1).strip()
    else:
        print(f"Record {idx}: No Solidity code found.")
        continue
    
    # Number the contract code
    lines = contract_code.split("\n")
    numbered_lines = [f"{i+1}: {line}" for i, line in enumerate(lines)]
    numbered_contract = "\n".join(numbered_lines)

    # Skip safe contracts
    if "appears to be safe" in completion_text.lower():
        continue

    # Skip unclear vulnerability cases
    if "the issue" not in completion_text.lower():
        continue

    is_integer_vuln = any(keyword in completion_text.lower() for keyword in integer_keywords)
    is_timestamp_vuln = any(keyword in completion_text.lower() for keyword in timestamp_keywords)

    if is_integer_vuln or is_timestamp_vuln:
        contract_filename = f"{vul_idx}.sol"
        contract_filepath = os.path.join(contracts_dir, contract_filename)

        # Save Solidity contract with numbered lines
        # with open(contract_filepath, "w", encoding="utf-8") as f:
        #     f.write(contract_code)

        loc_entry = {
            "id": record["id"],
            "contract_index": vul_idx,
            "lines_of_code": len(lines),
            "contract_file": contract_filepath
        }
        loc_data.append(loc_entry)

        vuln_record = {
            "id": record["id"],
            "prompt": prompt_text,
            "completion": completion_text,
            "contract_file": contract_filepath
        }

        if is_integer_vuln:
            integer_vulnerabilities.append(vuln_record)

        if is_timestamp_vuln:
            timestamp_vulnerabilities.append(vuln_record)

        print(f"\nExtracted Contract {vul_idx} (Record {idx}):\n")
        #print(numbered_contract)

        vul_idx += 1

integer_json_path = os.path.join(output_dir, "integer_vulnerabilities.json")
timestamp_json_path = os.path.join(output_dir, "timestamp_vulnerabilities.json")
loc_json_path = os.path.join(locs_dir, "loc_data.json")

with open(integer_json_path, "w", encoding="utf-8") as f:
    json.dump(integer_vulnerabilities, f, indent=4)

with open(timestamp_json_path, "w", encoding="utf-8") as f:
    json.dump(timestamp_vulnerabilities, f, indent=4)

# with open(loc_json_path, "w", encoding="utf-8") as f:
#     json.dump(loc_data, f, indent=4)

print("\nExtraction complete!")
print(f"Saved {len(integer_vulnerabilities)} integer overflow/underflow cases in {integer_json_path}")
print(f"Saved {len(timestamp_vulnerabilities)} timestamp dependency cases in {timestamp_json_path}")
print(f"Saved {len(loc_data)} contracts' LOC information in {loc_json_path}")


### **We plan to use DS4 only for timestamp dependency as other vulnerabilties are not enough (#IoU = 1)**

#### **working on extracted json file from DS4 for Timestamp dependency**

In [5]:
import json
import os
import re

dataset_name = "timestamp_vulnerabilities"
input_dir = os.path.join("..", "..", "data", "processed_data", "train_TrustLLM")
input_path = os.path.join(input_dir, f"{dataset_name}.json")
output_dir = input_dir
contracts_dir = os.path.join(output_dir, "contracts")
os.makedirs(contracts_dir, exist_ok=True)

with open(input_path, "r", encoding="utf-8") as file:
    data = json.load(file)
    
unique_records = {}

solidity_pattern = re.compile(r"```(?:solidity|Solidiy)?\n(.*?)\n```", re.DOTALL)

for record in data:
    record_id = record["id"]

    if record_id not in unique_records:
        unique_records[record_id] = {
            "id": record_id,
            "prompt": record["prompt"],
            "completion": record["completion"],
            "contract_file": f"{record_id}.sol"
        }

    prompt_text = record.get("prompt", "")
    code_match = solidity_pattern.search(prompt_text)

    if code_match:
        contract_code = code_match.group(1).strip()
        contract_filepath = os.path.join(contracts_dir, f"{record_id}.sol")

        with open(contract_filepath, "w", encoding="utf-8") as sol_file:
            sol_file.write(contract_code + "\n")

unique_json_path = os.path.join(output_dir, "unique_timestamp_vulnerabilities.json")
with open(unique_json_path, "w", encoding="utf-8") as json_file:
    json.dump(list(unique_records.values()), json_file, indent=4)

print(f"Unique records saved in: {unique_json_path}")
print(f"Solidity contracts saved in: {contracts_dir}")


Unique records saved in: ..\..\data\processed_data\train_TrustLLM\unique_timestamp_vulnerabilities.json
Solidity contracts saved in: ..\..\data\processed_data\train_TrustLLM\contracts


In [None]:
############## after saving unique records for timestamp dependency, 
############## and its sol files, we want to restruct the sol files as they are not in code structure
############## But before that we need to generate smart contracts containing vulnerable functions

In [6]:
import json
import os, sys,re
from openai import OpenAI
from tqdm.notebook import tqdm
import time
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)
from config.keys import OPENAI_API_KEY

In [9]:
class SmartContractGenerator:
    def __init__(self):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.message_template = [{
            "role": "system",
            "content": "You are a cybersecurity programmer. Your task is to generate a complete smart contract that contains a given vulnerable function while maintaining a meaningful contract structure."
        }]

    def extract_function(self, contract_code):
        """Extracts the vulnerable function from the smart contract."""
        function_pattern = r"function\s+.*?\{.*?\}"
        matches = re.findall(function_pattern, contract_code, re.DOTALL)
        return matches[0] if matches else None

    def generate_contract(self, function_code):
        """Generates a smart contract with the given vulnerable function."""
        prompt = f"""
You are given a Solidity function that contains a vulnerability. Your task is to generate a full smart contract that includes this function while maintaining a meaningful contract structure.

Function:
```solidity
{function_code}
```

** Do not use ```json OR ```solidity OR ``` OR any other extra texts in the output or any other extra texts, before the code and after the code. Ensure the contract compiles successfully and includes necessary imports, state variables, and other relevant details.
** Do not write any explanation (code or vulnerability explanations). Only generate smart contracts with multiple functions AND containing its corresponding function.
"""
        
        messages = self.message_template + [{"role": "user", "content": prompt}]
        
        response = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.1,
            max_tokens=3200,
            top_p=1.0,
            frequency_penalty=0,
            presence_penalty=0
        )
        return response.choices[0].message.content

    def process_contracts(self, input_folder, output_folder, end_contract=2):
        """Reads all .sol files, extracts functions, generates smart contracts, and saves them."""
        os.makedirs(output_folder, exist_ok=True)
        contract_files = [f for f in os.listdir(input_folder) if f.endswith(".sol")][:end_contract]
        
        for file_name in tqdm(contract_files, desc="Processing contracts"):
            input_path = os.path.join(input_folder, file_name)
            output_path = os.path.join(output_folder, file_name)
            
            with open(input_path, "r", encoding="utf-8") as f:
                contract_code = f.read()
            
            function_code = self.extract_function(contract_code)
            if function_code:
                generated_contract = self.generate_contract(function_code)
                
                with open(output_path, "w", encoding="utf-8") as f:
                    f.write(generated_contract)
                time.sleep(1)  # To avoid rate limiting

# Define input and output directories
input_dir = os.path.join("..", "..", "data", "processed_data", "train_TrustLLM", "contracts")
output_dir = os.path.join(input_dir, "generated_contracts")

# Run the generator
generator = SmartContractGenerator()
generator.process_contracts(input_dir, output_dir)

print(f"Generated contracts are saved in {output_dir}")


Processing contracts:   0%|          | 0/2 [00:00<?, ?it/s]

Generated contracts are saved in ..\..\data\processed_data\train_TrustLLM\contracts\generated_contracts


# **Final Code**

# **FINAL CODE: skip the above code and run:**

In [17]:
import json
import os
import sys
import re
import time
from openai import OpenAI
from tqdm.notebook import tqdm

project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(project_root)
from config.keys import OPENAI_API_KEY

class SmartContractGenerator:
    def __init__(self):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.message_template = [
            {
                "role": "system",
                "content": "You are a cybersecurity programmer. Your task is to generate a complete smart contract that contains a given vulnerable function while maintaining a meaningful contract structure."
            }
        ]

    def extract_vulnerable_code(self, contract_code):
        """Extracts the first vulnerable function and its line numbers."""
        function_pattern = r"function\s+.*?\{.*?\}"
        match = re.search(function_pattern, contract_code, re.DOTALL)
        
        if match:
            start_line = contract_code[:match.start()].count('\n') + 1
            end_line = start_line + match.group(0).count('\n')
            return match.group(0), start_line, end_line
        
        print("Warning: No valid function found in contract.")
        return None, None, None
    
    def generate_contract(self, function_code):
        """Generates a smart contract with the given vulnerable function."""
        prompt = f"""
You are given a Solidity function that contains a vulnerability. Your task is to generate a full smart contract that includes this function while maintaining a meaningful contract structure.

Function:
{function_code}

** Do not include markdown syntax. Only return raw Solidity code. Ensure the contract compiles successfully and includes necessary imports, state variables, and other relevant details.**
"""
        
        messages = self.message_template + [{"role": "user", "content": prompt}]
        response = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.1,
            max_tokens=3200,
            top_p=1.0,
            frequency_penalty=0,
            presence_penalty=0
        )
        
        return response.choices[0].message.content.strip()
    
    def analyze_vulnerability(self, contract_code, start_line, end_line):
        """Analyzes the vulnerability in the given function."""
        prompt = f"""
You are a security auditor. Analyze the following Solidity function and provide:
1. A reasoning for why it is vulnerable.
2. A secure fix for the vulnerability.
3. Potential risks an attacker could exploit.

### **Your Task:**
- **Identify all vulnerabilities where the contract depends on block.timestamp or block.number.**
- **List all vulnerable lines where timestamp dependency could be exploited.**
- **Ignore all other vulnerabilities such as reentrancy, access control, etc.**
- **Provide a JSON output containing:**
  - \"vulnerable_lines\": A list of all line numbers that contain timestamp dependency vulnerabilities.
  - \"reasoning\": Explain why these lines are vulnerable.
  - \"fix\": Provide a recommended fix for each occurrence.
  - \"risk\": Explain the potential risks of each vulnerability.

### **Smart Contract:**
```solidity
{contract_code}
```
### **Expected JSON Output Format:**
```
{{
    "vulnerable_lines": [<line_number_1>, <line_number_2>, ...],
    "reasoning": "<Explanation of why timestamp dependency is risky>",
    "fix": "<Recommended fix for all occurrences>",
    "risk": "<Potential attacks due to timestamp dependency>"
}}
```
 **Important Notes:**  
- **If there are multiple vulnerabilities, list all affected line numbers.**  
- **Do NOT include markdown syntax ```solidity in sol file.
- **Do NOT analyze other vulnerabilities like reentrancy, access control, etc.**  
- **Focus only on timestamp dependency.**  
- **Do NOT include markdown syntax like ```json or ```solidity. Only return pure JSON.**  

Provide the output in JSON format with the following fields: start_line, end_line, vulnerable_code, reasoning, fix, and risk.
"""
        
        messages = self.message_template + [{"role": "user", "content": prompt}]
        response = self.client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.1,
            max_tokens=800,
            top_p=1.0,
            frequency_penalty=0,
            presence_penalty=0
        )
        
        try: 
            response_content = response.choices[0].message.content.strip()
    
            # **Fix JSON Parsing Issue**
            # 1. Remove Markdown formatting (triple backticks)
            clean_json = re.sub(r"^```json\s*|\s*```$", "", response_content).strip()
    
            # 2. Parse JSON after cleaning
            analysis = json.loads(clean_json)
            
            # 3. Ensure JSON contains expected fields
            analysis.update({"start_line": start_line, "end_line": end_line})
            return analysis
        except json.JSONDecodeError:
            print("Error parsing JSON from OpenAI response:", response_content)
            return {
                "start_line": start_line,
                "end_line": end_line,
                "vulnerable_code": function_code,
                "reasoning": "Could not parse reasoning.",
                "fix": "Could not parse fix.",
                "risk": "Could not parse risk."
            }
    
    def process_contracts(self, input_folder, output_folder, json_output_folder, end_contract=2):
        """Processes Solidity files, extracts vulnerabilities, generates contracts, and saves JSON reports."""
        os.makedirs(output_folder, exist_ok=True)
        os.makedirs(json_output_folder, exist_ok=True)
        
        contract_files = [f for f in os.listdir(input_folder) if f.endswith(".sol")][:end_contract]
        
        for file_name in tqdm(contract_files, desc="Processing contracts"):
            input_path = os.path.join(input_folder, file_name)
            output_path = os.path.join(output_folder, file_name)
            json_output_path = os.path.join(json_output_folder, file_name.replace(".sol", ".json"))
            
            with open(input_path, "r", encoding="utf-8") as f:
                contract_code = f.read()
            
            function_code, start_line, end_line = self.extract_vulnerable_code(contract_code)
            if function_code:
                generated_contract = self.generate_contract(function_code)
                numbered_contract = "\n".join(f"{i+1}: {line}" for i, line in enumerate(generated_contract.splitlines()))
        
                new_function_code, new_start_line, new_end_line = self.extract_vulnerable_code(generated_contract)

                if new_function_code:
                    start_line, end_line = new_start_line, new_end_line  # Update with the new correct lines

                analysis = self.analyze_vulnerability(function_code, start_line, end_line)
                
            
                with open(output_path, "w", encoding="utf-8") as f:
            
                     f.write(numbered_contract)

                report = {
                    "start_line": start_line,
                    "end_line": end_line,
                    "vulnerable_code": function_code,
                    "reasoning": analysis["reasoning"],
                    "fix": analysis["fix"],
                    "risk": analysis["risk"]
                }
                with open(json_output_path, "w", encoding="utf-8") as json_file:
                    json.dump(report, json_file, indent=4)
                
                time.sleep(1)

input_dir = os.path.join("..", "..", "data", "processed_data", "train_TrustLLM", "contracts")
output_dir = os.path.join(input_dir, "generated_contracts")
json_output_dir = os.path.join(input_dir, "vulnerability_reports")

generator = SmartContractGenerator()
generator.process_contracts(input_dir, output_dir, json_output_dir)

print(f"Generated contracts are saved in {output_dir}")
print(f"Vulnerability reports are saved in {json_output_dir}")


Processing contracts:   0%|          | 0/2 [00:00<?, ?it/s]

Generated contracts are saved in ..\..\data\processed_data\train_TrustLLM\contracts\generated_contracts
Vulnerability reports are saved in ..\..\data\processed_data\train_TrustLLM\contracts\vulnerability_reports
