In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import pickle
import numpy as np

In [2]:
model_name = "facebook/opt-6.7b"

# Load tokenizer
llm_tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model in 8-bit with automatic device placement
llm = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,
    device_map="auto"
)

# Example: Generate from a prompt
inputs = llm_tokenizer("The future of AI is", return_tensors="pt").to(llm.device)
outputs = llm.generate(**inputs, max_new_tokens=50)
print(llm_tokenizer.decode(outputs[0], skip_special_tokens=True))


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The future of AI is in the hands of the people

The future of AI is in the hands of the people

The future of AI is in the hands of the people

The future of AI is in the hands of the people

The future of


In [9]:
acoustic_scale = 0.5
llm_weight = 0.5
seed_list = [1,2,3,4,5,6,7,8,9]
from llm_utils import cer_with_gpt2_decoder

In [10]:
for seed in seed_list:
    
    saved_dir = '/home/ubuntu/data/model_transcriptions_finetune/'
    model_name_str = f'transformer_short_training_fixed_seed_{seed}'
    
    print("RUNNING FOR: ", model_name_str)
    
    nbest_path = f"{saved_dir}{model_name_str}_nbest.pkl"
    with open(nbest_path, mode = 'rb') as f:
        nbest = pickle.load(f)
        
    model_outputs_path = f"{saved_dir}{model_name_str}_model_outputs.pkl"
    with open(model_outputs_path, mode = 'rb') as f:
        model_outputs = pickle.load(f)
        
    for i in range(len(model_outputs['transcriptions'])):
        new_trans = [ord(c) for c in model_outputs['transcriptions'][i]] + [0]
        model_outputs['transcriptions'][i] = np.array(new_trans)
        
    # Rescore nbest outputs with LLM
    llm_out = cer_with_gpt2_decoder(
        llm,
        llm_tokenizer,
        nbest[:],
        acoustic_scale,
        model_outputs,
        outputType="speech_sil",
        returnCI=True,
        lengthPenalty=0,
        alpha=llm_weight,
    )

    with open(saved_dir + f"{model_name_str}_llm_outs.txt", "w", encoding="utf-8") as f:
        f.write("\n".join(llm_out['decoded_transcripts'])+ "\n")   # one line per LLM output

RUNNING FOR:  transformer_short_training_fixed_seed_1


  0%|          | 0/8800 [00:00<?, ?it/s]

RUNNING FOR:  transformer_short_training_fixed_seed_2


  0%|          | 0/8800 [00:00<?, ?it/s]

In [1]:
print("hi")

hi
