In [1]:
!pip install tiktoken transformers==4.41

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting transformers==4.41
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/07/78/c23e1c70b89f361d855a5d0a19b229297f6456961f9a1afa9a69cd5a70c3/transformers-4.41.0-py3-none-any.whl (9.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting tokenizers<0.20,>=0.19 (from transformers==4.41)
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/40/4f/eb78de4af3b17b589f43a369cbf0c3a7173f25c3d2cd93068852c07689aa/tokenizers-0.19.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.20.2
    Uninstalling tokenizers-0.20.2:
      Successfully 

In [3]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"

tokenizer = AutoTokenizer.from_pretrained("../models/glm-4-9b-chat",trust_remote_code=True)

query = "你好"

inputs = tokenizer.apply_chat_template([{"role": "user", "content": query}],
                                       add_generation_prompt=True,
                                       tokenize=True,
                                       return_tensors="pt",
                                       return_dict=True
                                       )

inputs = inputs.to(device)
model = AutoModelForCausalLM.from_pretrained(
    "../models/glm-4-9b-chat",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True
).to(device).eval()

gen_kwargs = {"max_length": 2500, "do_sample": True, "top_k": 1}
with torch.no_grad():
    outputs = model.generate(**inputs, **gen_kwargs)
    outputs = outputs[:, inputs['input_ids'].shape[1]:]
    print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/10 [00:00<?, ?it/s]


你好👋！很高兴见到你，有什么可以帮助你的吗？


In [None]:
import datasets
from datasets import Dataset, load_dataset
wikieval = load_dataset(path='json', data_files='wikieval.json', split='train')


In [11]:
def gen_response(query, model):
    inputs = tokenizer.apply_chat_template([{"role": "user", "content": query}],
                                       add_generation_prompt=True,
                                       tokenize=True,
                                       return_tensors="pt",
                                       return_dict=True
                                       )
    inputs = inputs.to(model.device)
    gen_kwargs = {"max_length": 4096, "do_sample": True, "temperature": 0.7}
    with torch.no_grad():
        outputs = model.generate(**inputs, **gen_kwargs)
        outputs = outputs[:, inputs['input_ids'].shape[1]:]
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [26]:
gen_response('test',model)

"\nHello! It seems like you're just testing the platform. If you need any information or assistance, feel free to ask! Whether it's about a specific topic or just a general question, I'm here to help."

In [7]:
def gen_prompt(dataset: Dataset, chat_style: bool = False):
    prompts = []
    for i in range(len(dataset)):
        prompts.append(f"{dataset['context_v1'][i][0]}\n\nQuestion: {dataset['question'][i]}\n\nAnswer:")
    if chat_style:
        prompts = [[{"role": "user", "content": p}] for p in prompts]
    return prompts
msgs = gen_prompt(wikieval, chat_style=False)

In [None]:
from tqdm import tqdm

from concurrent.futures import ThreadPoolExecutor, as_completed
def batch_generate(self,
                prompts, 
                model,
                max_workers: int = 1):

    results = [None] * len(prompts)  # initialize the results
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {}
        for idx, prompt in enumerate(prompts):
            future = executor.submit(gen_response, prompt, model)
            futures[future] = idx

        for future in tqdm(as_completed(futures), total=len(prompts), desc="Generating"):
            idx = futures[future]
            result = future.result()
            results[idx] = result  # store the result at the correct order

    return results

In [12]:
res = []
for m in tqdm(msgs):
    res.append(gen_response(m, model))

100%|██████████| 50/50 [06:58<00:00,  8.37s/it]


In [17]:
from typing import List
import json
def save_data(results:List[str], model_name, filename):
    lines = []
    for i in range(len(wikieval)):
        lines.append({
            'id': i,
            'dataset': 'wikieval',
            'context': wikieval['context_v1'][i][0],
            'question': wikieval['question'][i].split('Question:')[1].strip(),
            'gt_answer': wikieval['answer'][i].split('Answer:')[1].strip(),
            'answer': results[i],
            'model': model_name
        })
    json.dump(lines, open(f'{filename}.json','w+'))
save_data(res, 'glm4-9b', 'glm4-9b')

In [14]:
res = [r.strip() for r in res]

In [15]:
res

['The scheduled launch date and time for the PSLV-C56 mission is Sunday, 30 July 2023, at 06:30 IST (Indian Standard Time) / 01:00 UTC (Coordinated Universal Time). The mission will be launched from the Satish Dhawan Space Centre First Launch Pad (FLP) in Sriharikota, Andhra Pradesh, India.',
 "The objective of the Uzbekistan-Afghanistan-Pakistan Railway Project is to establish a direct railway link between Uzbekistan and Pakistan, with Afghanistan's territory serving as the transit route. This project is designed to enhance trade and logistics efficiency in the following ways:\n\n1. **Reduced Cargo Delivery Times**: By creating a direct rail connection, the project aims to significantly reduce the time it takes to deliver cargo between Uzbekistan and Pakistan. This is expected to cut delivery times by approximately five days, thereby improving the speed and reliability of trade between the countries.\n\n2. **Increased Trade Volume**: The improved logistics efficiency will likely lead 