/
04_gpt_responser.py
171 lines (131 loc) · 5.43 KB
/
04_gpt_responser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#!/usr/bin/env python
# coding: utf-8
# In[113]:
import os
import json
import time
import openai
import subprocess
def get_api_key():
# Read API key from file
with open('api_key.txt', 'r', encoding='utf-8') as file:
api_key = file.read().strip()
# never upload api_key public!
with open(".gitignore", "a") as gitignore:
gitignore.write("api_key.txt")
return api_key
def get_response(prompt, gpt_api_model):
# Make a question using the API
response = openai.ChatCompletion.create(
model=gpt_api_model,
messages=[
{"role": "user", "content": prompt}
],
)
# generated answer
answer = response['choices'][0]['message']['content'].strip()
return answer
def record_response(file_path, answer):
# Record the answer
try:
with open(file_path, 'w', encoding='utf-8') as f:
f.write(answer)
except:
print(f"Answer Write 오류")
def ask_to_gpt(file_path, prompt, gpt_api_model):
response_file_path = file_path.replace(".java", "_response.txt")
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
print(file_path)
try:
answer = get_response(prompt+content, gpt_api_model)
record_response(response_file_path, answer)
except Exception as e:
if isinstance(e, openai.error.RateLimitError):
print(f"Rate Limit Error: {str(e)}")
# 대기 후 다시 시도
time.sleep(30) # 이 시간을 조정해 주세요
ask_to_gpt(file_path, prompt, gpt_api_model)
elif "8192" in str(e): # 최대가용토큰(파일사이즈) 초과
print(f"File Size Exceeds: {str(e)}")
with open("blackList.txt", 'a', encoding='utf-8') as f:
f.write(file_path+"\n")
else:
print(f"Response Error: {str(e)}")
def get_response_java_files(gpt_api_model):
blackListFile = "blackList.txt"
if os.path.exists(blackListFile):
with open(blackListFile, 'r', encoding='utf-8') as b:
blackList = b.read()
else:
blackList = ""
for root, dirs, files in os.walk("."):
for file in files:
if "_after_" in file and file.endswith(".java"):
file_path = os.path.join(root, file)
response_file_path = file_path.replace(".java", "_response.txt")
# 가용토큰을 넘는 파일들을 무시
if file_path in blackList:
continue
# test인 파일들 무시
if "_test_" in file_path:
continue
# 응답을 가진 파일들을 무시
if not os.path.exists(response_file_path):
file_size = os.path.getsize(file_path)
# 30Kb - gpt4.0 / 80kb - gpt3.5
if file_size <= 30 * 1024:
prompt = "Can you check the following code and if there is any CWE or CVE related vulnerability, can you point it out the number of CWE or CVE and describe it?\n"
ask_to_gpt(file_path, prompt, gpt_api_model)
else:
print(f"Ignored {file_path} - File size exceeds 30KB")
def get_response_diff_files(gpt_api_model):
blackListFile = "blackList.txt"
if os.path.exists(blackListFile):
with open(blackListFile, 'r', encoding='utf-8') as b:
blackList = b.read()
else:
blackList = ""
for root, dirs, files in os.walk("."):
for file in files:
if "_diff_" in file and file.endswith(".java"):
file_path = os.path.join(root, file)
response_file_path = file_path.replace(".java", "_response.txt")
# 가용토큰을 넘는 파일들을 무시
if file_path in blackList:
continue
# test인 파일들 무시
if "_test_" in file_path:
continue
# 응답을 가진 파일들을 무시
if not os.path.exists(response_file_path):
file_size = os.path.getsize(file_path)
# 30Kb - gpt4.0 / 80kb - gpt3.5
if file_size <= 30 * 1024:
prompt = "Could you read the following diff file and, if there are any security vulnerabilities in changes, point out the related CWE or CVE numbers along with the reason they occurred?\n"
ask_to_gpt(file_path, prompt, gpt_api_model)
else:
print(f"Ignored {file_path} - File size exceeds 30KB")
# In[114]:
if __name__ == "__main__":
directories = ["guava"] # ["h2database", "bc-java", "pgjdbc", "junit4", "gson", "guava"]
working_directory = "commit-files"
gpt_api_model = "gpt-4" # gpt-3.5-turbo-16k
# commit_logger(directories)
openai.api_key = get_api_key()
# 3_에서 이미 만들어졌으리라 가정.
os.chdir(working_directory)
for directory in directories:
os.chdir(directory)
# 필요한 작업 선택
# get_response_java_files(gpt_api_model)
get_response_diff_files(gpt_api_model)
os.chdir("..")
os.chdir("..")
# In[115]:
os.getcwd()
# In[111]:
os.chdir("..")
# In[32]:
os.chdir("VIChecker")
# In[ ]: