# TODO
---
1. Convert codes from list to string
---
2. Contract line numbers


In [14]:
import json
import os, sys
from openai import OpenAI
from tqdm.notebook import tqdm
import time
project_root = os.path.abspath(os.path.join(os.getcwd(), '..', '..','..'))
sys.path.append(project_root)
from config.keys import OPENAI_API_KEY

In [15]:
class Generator:
    def __init__(self, vulnerability):
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.message = []
        self.system_message = {"role":"system","content":"You are a cyber-security programmer that can detect line numbers from the contract based on the instruction."}
        self.user_prefix = f"""In the code below, detect {vulnerability} vulnerabilities and provide extra information regarding the vulnerable code snippet based on the given instruction"""
        self.output_formatter = """[
   {"vulnerableLines": "l1-l2",
    "vulnerableCode": "<a list containing lines of vulnerable piece of code>"
    "vulnerabilityReason": "<Reasons of the ulnerability of lines l1 to l2>",
    "potentialSecurityRisk": "<Potential risks theh vulneraility causes>",
    "fixedCode": "<the enclosing function with no bugs or vulnerability>"
  }
  ...
  ]"""
        self.formatter = f"Return the response in RFC8259 compliant JSON according to the ResponseFormat schema with no other text. Follow the example format:\n{self.output_formatter}"

    
    def set_target_vulnerability(self, vulnerability):
        self.vulnerability = vulnerability
        
    def update_message(self, new_message):
        self.message.append(new_message)
        
    def get_user_message(self, code, instruction):
        self.user_content = f"""
{self.user_prefix}

Instruction:
{instruction}
-----------------
Smart Contract Code:
{code}

-----------------
{self.formatter}
###
        """
        user_message = {"role": "user", "content":self.user_content}
        return user_message

    def get_example_message(self, example_data):
        train_code, instruction, train_response = map_example(example_data)
        train_user_message = self.get_user_message(train_code, instruction)
        train_assistant_message = {"role": "assistant", "content": train_response}
        return [train_user_message, train_assistant_message]
        

    def create_prompt(self, train_data, code, instruction):
        self.message = []
        self.message.append(self.system_message)
        for example_data in train_data:
            self.message.extend(self.get_example_message(example_data))
        self.message.append(self.get_user_message(code, instruction))

    # def generate(self):
    #     done = False
    #     while not done:
    #         try:
    #             completion = self.completion_with_backoff(model="gpt-4o", 
    #                                       messages=self.message,
    #                                       temperature=1,
    #                                       max_tokens=4096,
    #                                       top_p=1.,
    #                                       frequency_penalty=0,
    #                                       presence_penalty=0)
        
        
    #             answer = completion.choices[0].message.content
    #             done = True
    #         except RateLimitError:
    #             time.sleep(60)
    #             print("Rate limit exceeded. Paused for 60 seconds!")
                
    #     return answer, completion
        
    def generate(self):
        done = False
        i=0
        while not done:
            try:
                if i==5:
                    done=True
                completion = self.client.chat.completions.create(
                              model="gpt-4o",
                              messages = self.message,
                              temperature=1,
                              max_tokens=4096,
                              top_p=1.,
                              frequency_penalty=0,
                              presence_penalty=0,
                              stop=None
                            )
                answer = completion.choices[0].message.content
                done = True
                i+=1
            except Exception as e:
                time.sleep(120)
                print(e)
                print("Rate limit exceeded. Paused for 120 seconds!")
        return answer, completion


def read_json_files(fewshot_dir):
    all_data = []
    for filename in os.listdir(fewshot_dir):
        if filename.endswith(".json"):
            file_path = os.path.join(fewshot_dir, filename)
            with open(file_path, 'r') as f:
                data = json.load(f)
                all_data.append(data)
    return all_data

def map_example(example_data):

    if not example_data or len(example_data) < 1:
        raise ValueError("example_data must contain at least one element.")

    train_code = example_data[0]["input"]
    instruction = example_data[0]["output"]
    train_response = example_data[1:]

    return train_code, instruction, train_response


In [16]:
dataset = [
    {"dataset_name":"ESC_timestamp", 
     "vulnerability": "Timestamp Dependency"}
    ]
dataset_name = dataset[0]["dataset_name"]
vulnerability =dataset[0]["vulnerability"]
raw_fname = os.path.join("..", "..", "..","data", "dataset", "raw", dataset_name+".json")
fewshot_dir = os.path.join("..", "..", "..","data", "processed_data", "few_shots", dataset_name)
fewshot_data = read_json_files(fewshot_dir)
with open(raw_fname, 'r') as f:
    raw_data = json.load(f)

for raw_record in raw_data:
    print(raw_record.keys())
    generator = Generator(vulnerability)
    prompt = generator.create_prompt(fewshot_data, code=raw_record["input"], instruction=raw_record["output"])
    break
# os.makedirs(output_dir, exist_ok=True)
# locs_dir = os.path.join(output_dir, "LOCs")
# os.makedirs(locs_dir, exist_ok=True)
# contracts_dir = os.path.join(output_dir, "contracts")
# os.makedirs(contracts_dir, exist_ok=True)
# raw_dir = "../../data/dataset/raw"

dict_keys(['instruction', 'input', 'output'])


In [20]:
print(generator.message[5]['content'])


In the code below, detect Timestamp Dependency vulnerabilities and provide extra information regarding the vulnerable code snippet based on the given instruction

Instruction:
0. The provided smart contract code does not exhibit timestamp dependence vulnerabilities. The use of block.timestamp or now, which can be manipulated by miners, is not present in any critical operations within the contract. Time constraints that could be influenced by precise time measurements are absent, ensuring that no part of the contract's logic is susceptible to miner manipulation through timestamp adjustments. Furthermore, the precision of time measurements does not impact the contract logic since there are no time-sensitive operations that could be exploited. Therefore, the contract is secure against timestamp manipulation according to the four focus points: use of block.timestamp or now, time constraints in critical operations, potential for miner manipulation, and precision of time measurements impact

In [19]:
generator.message[5]['content']

'\nIn the code below, detect Timestamp Dependency vulnerabilities and provide extra information regarding the vulnerable code snippet based on the given instruction\n\nInstruction:\n0. The provided smart contract code does not exhibit timestamp dependence vulnerabilities. The use of block.timestamp or now, which can be manipulated by miners, is not present in any critical operations within the contract. Time constraints that could be influenced by precise time measurements are absent, ensuring that no part of the contract\'s logic is susceptible to miner manipulation through timestamp adjustments. Furthermore, the precision of time measurements does not impact the contract logic since there are no time-sensitive operations that could be exploited. Therefore, the contract is secure against timestamp manipulation according to the four focus points: use of block.timestamp or now, time constraints in critical operations, potential for miner manipulation, and precision of time measurements 