-
Notifications
You must be signed in to change notification settings - Fork 0
/
fetch_chatapi.py
63 lines (48 loc) · 1.7 KB
/
fetch_chatapi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import requests
import json
API_URL = "http://127.0.0.1:8080"
CHAT = [
"Hello, Assistant.",
"Hello. How may I helpou today?",
"Please tell me the largest city in Europe.",
"Sure. The largest city in Europe is Moscow, the capital of Russia.",
]
INSTRUCTION = "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
def trim(string):
return string.strip()
def trim_trailing(string):
return string.rstrip()
def format_prompt(question):
prompt = f"{INSTRUCTION}\n### Human: {question}\n### Assistant: "
return trim_trailing(prompt)
def tokenize(text):
response = requests.post(f"{API_URL}/tokenize", headers={"Content-Type": "application/json"}, json={"content": text})
tokens = response.json()["tokens"]
return tokens
N_KEEP = len(tokenize(INSTRUCTION))
def chat_completion(question):
prompt = format_prompt(question)
data = {
"prompt": prompt,
"temperature": 0.2,
"top_k": 40,
"top_p": 0.9,
"n_keep": N_KEEP,
"n_predict": 256,
"stop": ["\n### Human:"],
"stream": True
}
response = requests.post(f"{API_URL}/completion", headers={"Content-Type": "application/json"}, json=data, stream=True)
answer = ""
for line in response.iter_lines():
if line.startswith(b'data:'):
content = json.loads(line[5:])["content"]
print(content, end="")
answer += content
print()
CHAT.extend([question, trim(answer)])
while True:
question = input(">")
while question =="" or question == " ":
question = input(">")
chat_completion(question)