In [1]:
import torch
import os
import json

In [2]:
os.environ['HF_HOME'] =  "/mnt/swordfish-pool2/models/"
# os.environ["CUDA_VISIBLE_DEVICES"]="2,3"
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

# Tulu-65B

In [3]:
# dir_of_hf_w = "/mnt/swordfish-pool2/models/vicuna/13B"
dir_of_hf_w = "/mnt/swordfish-pool2/models/tulu"
print("Loading the model and tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(dir_of_hf_w)
model = AutoModelForCausalLM.from_pretrained(
    dir_of_hf_w,
    load_in_8bit=True,
    device_map="auto",
)
model.eval()

Loading the model and tokenizer...





Welcome to bitsandbytes. For bug reports, please run

python -m bitsandbytes

 and submit this information together with your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
bin /home/a.saakyan/tmp/ENTER/envs/alpaca/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so
CUDA SETUP: CUDA runtime path found: /usr/local/cuda/lib64/libcudart.so.11.0
CUDA SETUP: Highest compute capability among GPUs detected: 8.0
CUDA SETUP: Detected CUDA version 118
CUDA SETUP: Loading binary /home/a.saakyan/tmp/ENTER/envs/alpaca/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda118.so...


  warn(msg)


Loading checkpoint shards:   0%|          | 0/27 [00:00<?, ?it/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32001, 8192, padding_idx=0)
    (layers): ModuleList(
      (0-79): 80 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear8bitLt(in_features=8192, out_features=8192, bias=False)
          (k_proj): Linear8bitLt(in_features=8192, out_features=8192, bias=False)
          (v_proj): Linear8bitLt(in_features=8192, out_features=8192, bias=False)
          (o_proj): Linear8bitLt(in_features=8192, out_features=8192, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear8bitLt(in_features=8192, out_features=22016, bias=False)
          (down_proj): Linear8bitLt(in_features=22016, out_features=8192, bias=False)
          (up_proj): Linear8bitLt(in_features=8192, out_features=22016, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSN

In [8]:
def evaluate(tokenizer, model, 
             input, instruction=None, fs_prompt=None, 
             verbose=False):
    prompt = generate_prompt(input, instruction, fs_prompt)
    if verbose: 
        print("****PROMPT:****")
        print(prompt)
        print("****END PROMPT****")
    inputs = tokenizer(prompt, return_tensors="pt")
    input_ids = inputs["input_ids"].cuda() # these are integers encoded from words
    generation_config = GenerationConfig(
        do_sample=True,
        temperature=0.7,
        top_p=0.9,
        seed=42,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.eos_token_id
    )
    generation_output = model.generate(
        input_ids=input_ids,
        generation_config=generation_config,
        return_dict_in_generate=True,
        output_scores=True,
        max_new_tokens=256,
    )
    s = generation_output.sequences[0]
    output = tokenizer.decode(s)
    # return output.split("### Response:")[1].replace("</s>", "").strip()
    # return output.strip().replace("</s>", "").replace("<s>", "")
    return output.split("<assistant>")[1].replace("</s>", "").strip()


def construct_few_shot_prompt(train_file_path, n=10):
    # prompt = "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request."
    # sample 10 instances from train.json
    with open(train_file_path, "r") as f:
        train_samples = json.load(f)
        train_samples = train_samples[:n]
    for i, row in enumerate(train_samples):
        instruction, input, response = row["instruction"], row["input"], row["output"]
        if i ==0: prompt = f"""<user>\n{instruction}\nFor example:\n"""
        prompt += f"""{input}
{response}

"""
    prompt = f"{prompt}For the following sentence, {instruction.lower()}"
    return prompt.strip()

def generate_prompt(input, instruction=None, fs_prompt=None):
    if input and instruction:
        return f"""{instruction}
        
{input}\n<assistant>\n"""
    if input and fs_prompt:
        return f"""{fs_prompt}
        
{input}\n<assistant>\n"""

def run_inference(input_json, out_dir,
                   model_name, model, tokenizer,
                   n_shots = True, train_file_path=None,
                   verbose=False, 
                   start_idx = 1, stop_idx=10000):
    
    output = []
    with open( input_json,'r') as fp:
        test_file = json.load(fp)
        
    for i, line in enumerate(test_file):
        if i < start_idx: continue
        if i > stop_idx: break

        print(f"INSTANCE {i}, percentage: {i/len(test_file)}")
        if verbose: print(f"INSTRUCTION: {line['instruction']}\nINPUT: {line['input']}\n")

        if n_shots == 0:
            pred = evaluate(tokenizer, model, 
                            input=line['input'], instruction=line['instruction'], 
                            verbose=verbose)
        else:
            fs_prompt = construct_few_shot_prompt(train_file_path, n=n_shots)
            pred = evaluate(tokenizer, model, 
                            input=line['input'], fs_prompt=fs_prompt,
                            verbose=verbose)
            
        output += [{"instruction": line['instruction'],
                "input": line['input'],
                "pred": pred,
                "id": i
                }]
        if verbose:
            print("RESPONSE:\n", pred)
            print("-"*50)

        if i%10==0:
            with open(f"{out_dir}/{model_name}_test_output_{start_idx}_{stop_idx}.json",'w', encoding='utf-8') as fp:
                json.dump(output, fp, indent=4)
    # final save
    with open(f"{out_dir}/{model_name}_test_output_{start_idx}_{stop_idx}.json",'w', encoding='utf-8') as fp:
            json.dump(output, fp, indent=4)
    return output

In [14]:
out_dir = "../..//data/gyafc_w_ICHF_alpaca/informal_to_formal"
assert os.path.isdir(out_dir)
input_json = "../../data/gyafc_w_ICHF_alpaca/informal_to_formal/test.json"
assert os.path.isfile(input_json)
train_file_path = "../../data/gyafc_w_ICHF_alpaca/informal_to_formal/train.json"
assert os.path.isfile(train_file_path)

# fs_prompt = construct_few_shot_prompt(train_file_path, n=3)
# print(generate_prompt(input="test", fs_prompt=fs_prompt))

run_inference(input_json, out_dir, 
              "tulu", model, tokenizer, 
              n_shots=1, train_file_path=train_file_path,
              verbose=False, start_idx=800, stop_idx=2000)

INSTANCE 800, percentage: 0.40816326530612246
INSTANCE 801, percentage: 0.4086734693877551
INSTANCE 802, percentage: 0.4091836734693878
INSTANCE 803, percentage: 0.40969387755102044
INSTANCE 804, percentage: 0.41020408163265304
INSTANCE 805, percentage: 0.4107142857142857
INSTANCE 806, percentage: 0.41122448979591836
INSTANCE 807, percentage: 0.411734693877551
INSTANCE 808, percentage: 0.4122448979591837
INSTANCE 809, percentage: 0.41275510204081634
INSTANCE 810, percentage: 0.413265306122449
INSTANCE 811, percentage: 0.41377551020408165
INSTANCE 812, percentage: 0.4142857142857143
INSTANCE 813, percentage: 0.4147959183673469
INSTANCE 814, percentage: 0.4153061224489796
INSTANCE 815, percentage: 0.41581632653061223
INSTANCE 816, percentage: 0.4163265306122449
INSTANCE 817, percentage: 0.41683673469387755
INSTANCE 818, percentage: 0.4173469387755102
INSTANCE 819, percentage: 0.41785714285714287
INSTANCE 820, percentage: 0.41836734693877553
INSTANCE 821, percentage: 0.4188775510204082
IN

[{'instruction': 'Identify informal attributes in a given sentence, modify them to create a formal sentence, and then output the attributes of the generated formal sentence.',
  'input': 'Informal: Grandma always said: Why buy the cow ?,  when you can get the milk for free',
  'pred': 'Informal Attributes: idiomatic expression ("why buy the cow"), colloquial language ("grandma always said")\nFormal: My grandmother frequently remarked that there is no need to purchase the entire cow when one can obtain the milk at no cost.Formal Attributes: lexical sophistication ("frequently remarked"), formal language ("there is no need to purchase")',
  'id': 800},
 {'instruction': 'Identify informal attributes in a given sentence, modify them to create a formal sentence, and then output the attributes of the generated formal sentence.',
  'input': "Informal: Then you're just a bad memory.",
  'pred': 'Informal Attributes: direct statement form, use of slang/informal language ("bad memory")',
  'id':

In [12]:
# out_dir = "../data/gyafc_w_ICHF_alpaca/formal_to_informal"
# assert os.path.isdir(out_dir)
# input_json = "../data/gyafc_w_ICHF_alpaca/formal_to_informal/test.json"
# assert os.path.isfile(input_json)
# train_file_path = "../data/gyafc_w_ICHF_alpaca/formal_to_informal/train.json"
# assert os.path.isfile(train_file_path)

# run_inference(input_json, out_dir, 
#               "tulu", model, tokenizer, 
#               n_shots=1, train_file_path=train_file_path,
#               verbose=False, start_idx = 0, stop_idx=400)