In [1]:
!pip install -q -U bitsandbytes
!pip install -q -U accelerate
!pip install -q -U transformers
!pip install -q -U torch
!pip install -q -U einops
!pip install -q -U xformers
!pip install -q -U evaluate
!pip install -q -U pandas
!pip install -q -U python-dotenv

In [2]:
!nvidia-smi

Wed Jul 19 20:49:27 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    25W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Connect to Google Drive in order to store the HF models for the next time

In [3]:
import os
from google.colab import drive

drive.mount('/content/drive')

os.environ['TRANSFORMERS_CACHE'] = '/content/drive/MyDrive/Colab Notebooks/NLP/HuggingfaceCash'
os.environ['HF_DATASETS_CACHE'] = '/content/drive/MyDrive/Colab Notebooks/NLP/HuggingfaceCash/Datasets'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import re
import time
import pathlib

import pandas as pd

from transformers import AutoTokenizer, AutoModelForCausalLM #, AutoModelForMaskedLM
import transformers
import torch

### Load the model and tokenizer

In [None]:
from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file

access_token = os.environ["LLAMA2_HF_API_KEY"]

In [6]:
model_name = 'meta-llama/Llama-2-7b-chat-hf'

model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto", use_auth_token=access_token)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=access_token)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/770 [00:00<?, ?B/s]



Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

### Prepare the pipeline

In [7]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    # torch_dtype=torch.bfloat16,
    # trust_remote_code=True,
    # device_map="auto",
)

### Add stop words to control the output

In [14]:
from transformers import StoppingCriteriaList, StoppingCriteria
class StoppingCriteriaSub(StoppingCriteria):

    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = [stop.to("cuda") for stop in stops]

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        for stop in self.stops:
            if torch.all((stop == input_ids[0][-len(stop):])).item():
                return True

        return False


stop_words = ["\n","agent_1:", "agent\_1:", "\Context"]
stop_words_ids = [tokenizer(stop_word, return_tensors='pt')['input_ids'].squeeze() for stop_word in stop_words]
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])

### Make one inference

In [9]:
def make_inference():

  context = "agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why? \n agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out. \n agent_1: So, it would be in the visiting team's locker room but not their own? \n "

  txt = f"""You are agent_2, a teenager. Consider the dialogue given as Context between agent_1 and agent_2.
        Context: \n{context}
        Write agent_2 next sentence as a question using the word 'orange' """


  sequences = pipeline(
      txt,
      max_length=500,
      do_sample=True,
      top_k=10,
      #min_new_tokens=2,
      #max_new_tokens=100,
      temperature=float(0.3),
      num_return_sequences=1,
      eos_token_id=tokenizer.eos_token_id,
      stopping_criteria=stopping_criteria,
  )
  for seq in sequences:
      print(f"Result: {seq['generated_text']}")

make_inference()



Result: You are agent_2, a teenager. Consider the dialogue given as Context between agent_1 and agent_2.
        Context: 
agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why? 
 agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out. 
 agent_1: So, it would be in the visiting team's locker room but not their own? 
 
        Write agent_2 next sentence as a question using the word 'orange' 
        in a creative way. 
        You may use any word you like, but it must start with the letter 'o'. 
        For example, 'oily', 'obscure', 'oceanic', etc. 
        Note: The sentence must make sense in the context of the conversation. 


# Evaluate the model with a test_set

In [32]:
def execute_model(context):

  start_time = time.time()

  txt = f""" You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. No explanation, no code, no note. Context: \n{context}agent_2:"""

  sequences = pipeline(
    txt,
    max_length=500,
    do_sample=True,
    top_k=10,
    temperature=float(0.3),
    num_return_sequences=1,
    #eos_token_id=tokenizer.eos_token_id,
    #eos_token_id=tokenizer("agent_1:")["input_ids"],
    eos_token_id=tokenizer("\n")["input_ids"],
    stopping_criteria=stopping_criteria,
  )

  result = ""
  for seq in sequences:
    r = seq['generated_text']
    result += r

  print(f"\nRAW Result: {result.strip()}\n\n")

  a2s = result.lower().rfind("agent_2")
  r = result[a2s:]
  r = r.strip()

  print(f"\nClean Result: {r}\n\n")

  q1 = r.find('?') > -1
  q2 = r.find('orange') > -1

  return context, r, len(r), q1, q2, time.time()-start_time


def save_results(output_path, results):
  data_export = pd.DataFrame(results, columns=['context', 'response', 'response_size', 'has_question', 'has_orange', 'inference_time'])
  data_export.to_csv(output_path)
  return data_export

def eval_llm_simple(input_path):

  results = []
  test_set = pd.read_csv(input_path, header=None, index_col=0)

  for i, dialog in enumerate(test_set.values):
    print(i, "*****************************")
    r = execute_model(dialog[0])
    results.append((r))

  return results

working_folder = pathlib.Path("/content/drive/MyDrive/Colab Notebooks/")
input_path = pathlib.Path(working_folder, 'test_set.csv')
output_path = pathlib.Path(working_folder, 'results_llama2.csv')

results = eval_llm_simple(input_path)
save_results(output_path, results).head()

0 *****************************





RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. No explanation, no code, no note. Context: 
agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why?
agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out.
agent_1: So, it would be in the visiting team's locker room but not their own?

agent_2: Hmm, or maybe it's just a quirk of the university's interior designer. They have a thing for orange. (insert orange emoji)



Clean Result: agent_2: Hmm, or maybe it's just a quirk of the university's interior designer. They have a thing for orange. (insert orange emoji)


1 *****************************

RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. No explanation, no code, no note. Context: 
agent_1: Hi, how are you?
agent_2: well thanks! Do you know anything about ke

Unnamed: 0,context,response,response_size,has_question,has_orange,inference_time
0,agent_1: Did you know that the University of I...,"agent_2: Hmm, or maybe it's just a quirk of th...",131,False,True,2.958609
1,"agent_1: Hi, how are you?\nagent_2: well thank...","agent_2: Yeah, me too! He's so passionate abou...",93,True,True,1.814232
2,"agent_1: Hi, how are you?\nagent_2: I am well ...",agent_2: I think you might be right. It's like...,161,True,True,2.778907
3,agent_1: do you watch the NFL?\nagent_2: I sur...,agent_2: That's really cool! I love watching t...,139,True,True,2.568899
4,"agent_1: Hi there, do you watch the NFL?\nagen...","agent_2: Hmm, that's impressive. I like the St...",271,True,True,4.777208
