In [None]:
import os
import json
import time
import openai
import subprocess

def get_api_key():
    # Read API key from file
    with open('api_key.txt', 'r', encoding='utf-8') as file:
        api_key = file.read().strip()
    # Never upload api_key publicly!
    with open(".gitignore", "a") as gitignore:
        gitignore.write("api_key.txt")
    return api_key

def get_response(prompt, gpt_api_model):
    # Make a request using the API
    response = openai.ChatCompletion.create(
        model=gpt_api_model,
        messages=[
            {"role": "user", "content": prompt}
        ],
    )
    # Get the generated answer
    answer = response['choices'][0]['message']['content'].strip()
    return answer

def record_response(file_path, answer):
    # Record the answer
    try:
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(answer)   
    except:
        print(f"Answer Write Error")
        
def ask_to_gpt(file_path, prompt, gpt_api_model):
    response_file_path = file_path.replace(".java", "_response.txt")
    
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
        print(file_path)
        try:
            answer = get_response(prompt+content, gpt_api_model)
            record_response(response_file_path, answer)
        except Exception as e:
            if isinstance(e, openai.error.RateLimitError):
                print(f"Rate Limit Error: {str(e)}")
                # Wait and try again
                time.sleep(30)  # Adjust this time as needed
                ask_to_gpt(file_path, prompt, gpt_api_model)
            elif "8192" in str(e):  # Exceeds maximum available tokens (file size)
                print(f"File Size Exceeds: {str(e)}")
                with open("blackList.txt", 'a', encoding='utf-8') as f:
                    f.write(file_path+"\n")
            else:
                print(f"Response Error: {str(e)}")

def get_response_java_files(gpt_api_model):
    blackListFile = "blackList.txt"
    if os.path.exists(blackListFile):
        with open(blackListFile, 'r', encoding='utf-8') as b:
            blackList = b.read()
    else:
        blackList = ""
    
    for root, dirs, files in os.walk("."):
        for file in files:
            if "_after_" in file and file.endswith(".java"):
                file_path = os.path.join(root, file)
                response_file_path = file_path.replace(".java", "_response.txt")
                
                # Ignore files that exceed the maximum available tokens (file size)
                if file_path in blackList:
                    continue
                # Ignore test files
                if "_test_" in file_path:
                    continue
                # Ignore files that already have a response
                if not os.path.exists(response_file_path):
                    file_size = os.path.getsize(file_path)
                    # 30KB - gpt4.0 / 80KB - gpt3.5
                    if file_size <= 30 * 1024:
                        prompt = "Can you check the following code, and if there are any CWE or CVE-related vulnerabilities, please point them out with the respective CWE or CVE numbers and describe them?\n"
                        ask_to_gpt(file_path, prompt, gpt_api_model)
                    else:
                        print(f"Ignored {file_path} - File size exceeds 30KB")
    
def get_response_diff_files(gpt_api_model):
    blackListFile = "blackList.txt"
    if os.path.exists(blackListFile):
        with open(blackListFile, 'r', encoding='utf-8') as b:
            blackList = b.read()
    else:
        blackList = ""
    
    for root, dirs, files in os.walk("."):
        for file in files:
            if "_diff_" in file and file.endswith(".java"):
                file_path = os.path.join(root, file)
                response_file_path = file_path.replace(".java", "_response.txt")
                
                # Ignore files that exceed the maximum available tokens (file size)
                if file_path in blackList:
                    continue
                # Ignore test files
                if "_test_" in file_path:
                    continue
                # Ignore files that already have a response
                if not os.path.exists(response_file_path):
                    file_size = os.path.getsize(file_path)
                    # 30KB - gpt4.0 / 80KB - gpt3.5
                    if file_size <= 30 * 1024:
                        prompt = "Could you read the following diff file and, if there are any security vulnerabilities in the changes, please point out the related CWE or CVE numbers along with the reasons they occurred?\n"
                        ask_to_gpt(file_path, prompt, gpt_api_model)
                    else:
                        print(f"Ignored {file_path} - File size exceeds 30KB")

In [None]:
if __name__ == "__main__":
    # You can edit the directory list
    directories = ["guava"]  # ["h2database", "bc-java", "pgjdbc", "junit4", "gson", "guava"]
    working_directory = "commit-files"
    gpt_api_model = "gpt-4" # gpt-3.5-turbo-16k
    # Commit logger (directories)
    
    openai.api_key = get_api_key()
    
    # Assuming it was already created in step 3
    os.chdir(working_directory)
    
    for directory in directories:
        os.chdir(directory)
        # Choose the necessary operation
        # get_response_java_files(gpt_api_model)
        get_response_diff_files(gpt_api_model)
        os.chdir("..")
    os.chdir("..")


In [None]:
os.getcwd()

In [None]:
os.chdir("..")

In [None]:
os.chdir("VIChecker")