In [1]:
import torch
import os
import json

In [2]:
os.environ['HF_HOME'] =  "/mnt/swordfish-pool2/models/"
# os.environ["CUDA_VISIBLE_DEVICES"]="2,3"
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

  from .autonotebook import tqdm as notebook_tqdm


# Falcon-40B

In [3]:
# dir_of_hf_w = "/mnt/swordfish-pool2/models/vicuna/13B"
dir_of_hf_w = "/mnt/swordfish-pool2/models/tulu"
print("Loading the model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(dir_of_hf_w)
model = AutoModelForCausalLM.from_pretrained(
    dir_of_hf_w,
    load_in_8bit=False,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)
model.eval()

Loading the model and tokenizer...


Explicitly passing a `revision` is encouraged when loading a configuration with custom code to ensure no malicious code has been contributed in a newer revision.
Explicitly passing a `revision` is encouraged when loading a model with custom code to ensure no malicious code has been contributed in a newer revision.
Loading checkpoint shards: 100%|██████████| 9/9 [01:16<00:00,  8.49s/it]


RWForCausalLM(
  (transformer): RWModel(
    (word_embeddings): Embedding(65024, 8192)
    (h): ModuleList(
      (0-59): 60 x DecoderLayer(
        (ln_attn): LayerNorm((8192,), eps=1e-05, elementwise_affine=True)
        (ln_mlp): LayerNorm((8192,), eps=1e-05, elementwise_affine=True)
        (self_attention): Attention(
          (maybe_rotary): RotaryEmbedding()
          (query_key_value): Linear(in_features=8192, out_features=9216, bias=False)
          (dense): Linear(in_features=8192, out_features=8192, bias=False)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (mlp): MLP(
          (dense_h_to_4h): Linear(in_features=8192, out_features=32768, bias=False)
          (act): GELU(approximate='none')
          (dense_4h_to_h): Linear(in_features=32768, out_features=8192, bias=False)
        )
      )
    )
    (ln_f): LayerNorm((8192,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=8192, out_features=65024, bias=False)
)

In [4]:
def evaluate(tokenizer, model, 
             input, instruction=None, fs_prompt=None, 
             verbose=False):
    prompt = generate_prompt(input, instruction, fs_prompt)
    if verbose: 
        print("****PROMPT:****")
        print(prompt)
        print("****END PROMPT****")
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda() # these are integers encoded from words
    generation_config = GenerationConfig(
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256,
    )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s)
    # return output.split("### Response:")[1].replace("</s>", "").strip()
    # return output.strip().replace("</s>", "").replace("<s>", "")
    return output.strip().replace("<|endoftext|>", "").strip()


def construct_few_shot_prompt(train_file_path, n=10):
    # prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."
    # sample 10 instances from train.json
    with open(train_file_path, "r") as f:
        train_samples = json.load(f)
        train_samples = train_samples[:n]
    for i, row in enumerate(train_samples):
        instruction, input, response = row["instruction"], row["input"], row["output"]
        if i ==0: prompt = f"""{instruction}\nFor example:\n"""
        prompt += f"""{input}
{response}

"""
    prompt = f"{prompt}For the following sentence, {instruction.lower()}"
    return prompt.strip()

def generate_prompt(input, instruction=None, fs_prompt=None):
    if input and instruction:
        return f"""{instruction}
{input}"""
    if input and fs_prompt:
        return f"""{fs_prompt}
        
{input}"""

def run_inference(input_json, out_dir,
                   model_name, model, tokenizer,
                   n_shots = True, train_file_path=None,
                   verbose=False, 
                   start_idx = 1, stop_idx=10000):
    
    output = []
    with open( input_json,'r') as fp:
        test_file = json.load(fp)
        
    for i, line in enumerate(test_file):
        if i < start_idx: continue
        if i > stop_idx: break

        print(f"INSTANCE {i}, percentage: {i/len(test_file)}")
        if verbose: print(f"INSTRUCTION: {line['instruction']}\nINPUT: {line['input']}\n")

        if n_shots == 0:
            pred = evaluate(tokenizer, model, 
                            input=line['input'], instruction=line['instruction'], 
                            verbose=verbose)
        else:
            fs_prompt = construct_few_shot_prompt(train_file_path, n=n_shots)
            pred = evaluate(tokenizer, model, 
                            input=line['input'], fs_prompt=fs_prompt,
                            verbose=verbose)
            
        output += [{"instruction": line['instruction'],
                "input": line['input'],
                "pred": pred,
                "id": i
                }]
        if verbose:
            print("RESPONSE:\n", pred)
            print("-"*50)

        if i%10==0:
            with open(f"{out_dir}/{model_name}_test_output_{start_idx}_{stop_idx}.json",'w', encoding='utf-8') as fp:
                json.dump(output, fp, indent=4)
    # final save
    with open(f"{out_dir}/{model_name}_test_output_{start_idx}_{stop_idx}.json",'w', encoding='utf-8') as fp:
            json.dump(output, fp, indent=4)
    return output

In [None]:
# out_dir = "../data/gyafc_w_ICHF_alpaca/informal_to_formal"
# assert os.path.isdir(out_dir)
# input_json = "../data/gyafc_w_ICHF_alpaca/informal_to_formal/test.json"
# assert os.path.isfile(input_json)
# train_file_path = "../data/gyafc_w_ICHF_alpaca/informal_to_formal/train.json"
# assert os.path.isfile(train_file_path)

# # fs_prompt = construct_few_shot_prompt(train_file_path, n=3)
# # print(generate_prompt(input="test", fs_prompt=fs_prompt))

# run_inference(input_json, out_dir, 
#               "falcon", model, tokenizer, 
#               n_shots=1, train_file_path=train_file_path,
#               verbose=False, start_idx=0, stop_idx=1)

In [None]:
out_dir = "../data/gyafc_w_ICHF_alpaca/formal_to_informal"
assert os.path.isdir(out_dir)
input_json = "../data/gyafc_w_ICHF_alpaca/formal_to_informal/test.json"
assert os.path.isfile(input_json)
train_file_path = "../data/gyafc_w_ICHF_alpaca/formal_to_informal/train.json"
assert os.path.isfile(train_file_path)

run_inference(input_json, out_dir, 
              "falcon", model, tokenizer, 
              n_shots=1, train_file_path=train_file_path,
              verbose=False, start_idx = 0, stop_idx=1)