change gpt-j to THUDM/ChatGLM-6B

little51 · little51 · commit 6d5d61583b0e · 2023-04-07T19:25:14.000+08:00
diff --git a/ChatGLM_6b.py b/ChatGLM_6b.py
@@ -0,0 +1,13 @@
+import requests
+import json
+
+def getAnswerFromChatGLM6b(context):
+    url = 'http://172.16.62.136:8000/'
+    data = '{"prompt": "'  + context +  '", "history": []}'
+    headers = {'content-type': 'application/json;charset=utf-8'}
+    r = requests.post(url, data=data.encode(), headers=headers)
+    res = r.json()
+    if r.status_code == 200 :
+        return res['response']
+    else:
+        return ""
diff --git a/chat/src/App.js b/chat/src/App.js
@@ -212,7 +212,7 @@ function App() {
               title: 'More',
             },
           ],
-          title: '基于Salesforce codegen和GPT-J-6B的AI代码生成',
+          title: '基于Salesforce codegen和清华THUDM/ChatGLM-6B的AI代码生成',
         }}
         messages={messages}
         renderMessageContent={renderMessageContent}
diff --git a/codegen.py b/codegen.py
@@ -9,6 +9,7 @@
 from jaxformer.hf.sample import load_model,sampling
 from gpt_j import gpt_load_model,gpt_generate
 from codegen_stream import codegen_stream
+from ChatGLM_6b import getAnswerFromChatGLM6b
 
 ROOT = os.path.dirname(__file__)
 
@@ -43,7 +44,8 @@ async def codegen(request):
     context = context.replace("//","").replace("#","").strip()
     stop = False
     if flag_chs :#or content.startwith('gpt-j') :
-        result = getAnswerFromChatGPTJ(context,maxlength).replace(context,"")
+        # result = getAnswerFromChatGPTJ(context,maxlength).replace(context,"")
+        result = getAnswerFromChatGLM6b(context)
     else:
         result,stop = sampling(context,maxlength)
     end = time.perf_counter()
diff --git a/codegen_stream.py b/codegen_stream.py
@@ -4,6 +4,7 @@
 import json
 from jaxformer.hf.sample import load_model, sampling
 from gpt_j import gpt_load_model, gpt_generate_stream
+from ChatGLM_6b import getAnswerFromChatGLM6b
 
 
 def sampling_gptj(context, maxlength):
@@ -25,10 +26,13 @@ async def codegen_stream(request):
     flag_chs = f(context)
     stop = False
     if flag_chs:
-        results = sampling_gptj(context, maxlength)
-        results = json.loads(results)
-        result_en = results["result_en"]
-        result_ch = results["result_ch"]
+        # results = sampling_gptj(context, maxlength)
+        # results = json.loads(results)
+        # result_en = results["result_en"]
+        # result_ch = results["result_ch"]
+        result_en = getAnswerFromChatGLM6b(context)
+        result_ch = result_en
+        stop = True
     else:
         result_en,stop = sampling(context, maxlength)
         result_ch = result_en
diff --git a/jaxformer/hf/sample.py b/jaxformer/hf/sample.py
@@ -207,7 +207,8 @@ def load_model():
     device = torch.device('cuda:0')
     use_fp16 = True
     model_name = "codegen-6B-mono"
-    # model_name = "codegen-350M-mono"  # test on windows
+    if os.name == 'nt':
+        model_name = "codegen-350M-mono"  # test on windows
     ckpt = f'./checkpoints/{model_name}'
     # (3) load
     with print_time('loading parameters'):