In [1]:
!pip install -q -U bitsandbytes
!pip install -q -U accelerate
!pip install -q -U transformers
!pip install -q -U torch
!pip install -q -U einops
!pip install -q -U xformers
!pip install -q -U evaluate
!pip install -q -U pandas

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.5/92.5 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m21.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m57.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.1/109.1 MB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
!nvidia-smi

Wed Jul 19 22:08:55 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-SXM2...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    23W / 300W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### Connect to Google Drive in order to store the HF models for the next time

In [3]:
import os
from google.colab import drive

drive.mount('/content/drive')

os.environ['TRANSFORMERS_CACHE'] = '/content/drive/MyDrive/Colab Notebooks/NLP/HuggingfaceCash'
os.environ['HF_DATASETS_CACHE'] = '/content/drive/MyDrive/Colab Notebooks/NLP/HuggingfaceCash/Datasets'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
import re
import time
import pathlib

import pandas as pd

from transformers import AutoTokenizer, AutoModelForCausalLM #, AutoModelForMaskedLM
import transformers
import torch

### Load the model and tokenizer

In [5]:
model_name = "tiiuae/falcon-7b-instruct"

model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto", trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Prepare the pipeline

In [6]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    # trust_remote_code=True,
    # device_map="auto",
)

### Add stop words to control the output

In [7]:
from transformers import StoppingCriteriaList, StoppingCriteria
class StoppingCriteriaSub(StoppingCriteria):

    def __init__(self, stops = [], encounters=1):
        super().__init__()
        self.stops = [stop.to("cuda") for stop in stops]

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
        for stop in self.stops:
            if torch.all((stop == input_ids[0][-len(stop):])).item():
                return True

        return False


stop_words = ["\\n","agent_1:", "agent\_1:", "\Context"]
stop_words_ids = [tokenizer(stop_word, return_tensors='pt')['input_ids'].squeeze() for stop_word in stop_words]
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])

### Make one inference

In [8]:
def make_inference():
  #txt = """
  #    Answer the question as truthfully as possible using the provided text, and if the answer is not contained within the text below, say "I don't know my lord!"
  #    Context: The men's high jump event at the 2020 Summer Olympics took place between 30 July and 1 August 2021 at the Olympic Stadium. 33 athletes from 24 nations competed; the total possible number depended on how many nations would use universality places to enter athletes in addition to the 32 qualifying through mark or ranking (no universality places were used in 2021). Italian athlete Gianmarco Tamberi along with Qatari athlete Mutaz Essa Barshim emerged as joint winners of the event following a tie between both of them as they cleared 2.37m. Both Tamberi and Barshim agreed to share the gold medal in a rare instance where the athletes of different nations had agreed to share the same medal in the history of Olympics. Barshim in particular was heard to ask a competition official "Can we have two golds?" in response to being offered a 'jump off'. Maksim Nedasekau of Belarus took bronze. The medals were the first ever in the men's high jump for Italy and Belarus, the first gold in the men's high jump for Italy and Qatar, and the third consecutive medal in the men's high jump for Qatar (all by Barshim). Barshim became only the second man to earn three medals in high jump, joining Patrik Sjöberg of Sweden (1984 to 1992).
  #    Who is the president of France?
  #    """
  # txt = "What is the color of a tomato?"
  #
  # txt = """You are agent_2, a teenager. Consider the dialogue given as Context between agent_1 and agent_2. Write agent_2 next sentence as an open ended question.
  #       Context: agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why? \n agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out. \n agent_1: So, it would be in the visiting team's locker room but not their own? \n"""
  #
  # txt = """You are agent_2, a teenager. Consider the dialogue given as Context between agent_1 and agent_2. What is agent_2 next open ended sentence?
  #       Context: agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why? \n agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out. \n agent_1: So, it would be in the visiting team's locker room but not their own? \n"""
  #
  # txt = """You are agent_2, a teenager. Consider the dialogue given as Context between agent_1 and agent_2. Write agent_2 response including the word 'orange'.
  #       Context: agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why? \n agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out. \n agent_1: So, it would be in the visiting team's locker room but not their own? \n"""

  # context = "agent_1: Hi, how are you? \n agent_2: I am well thanks. How deep do you think NCAA basketball violations go? \n agent_1: Not well but I also think the NCAA is so weak now that the findings of any major sanctions will be difficult and take a long time to investigate. What about you? \n "

  context = "agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why? \n agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out. \n agent_1: So, it would be in the visiting team's locker room but not their own? \n "

  txt = f"""You are agent_2, a teenager. Consider the dialogue given as Context between agent_1 and agent_2.
        Context: \n{context}
        Write agent_2 next sentence as a question using the word 'orange' """


  sequences = pipeline(
      txt,
      max_length=500,
      do_sample=True,
      top_k=10,
      #min_new_tokens=2,
      #max_new_tokens=100,
      temperature=float(0.3),
      num_return_sequences=1,
      eos_token_id=tokenizer.eos_token_id,
      stopping_criteria=stopping_criteria,
  )
  for seq in sequences:
      print(f"Result: {seq['generated_text']}")

# make_inference()

# Evaluate the model with a test_set

In [9]:
def execute_model(context):

  start_time = time.time()

  # txt_001 = f""" You are agent_2, a teenager. Consider the dialogue given as Context between agent_1 and agent_2. What is agent_2 next open ended question? Use the word 'orange' if possible. Context: \n{context}"""
  txt = f""" You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: \n{context}agent_2:"""

  sequences = pipeline(
    txt,
    max_length=500,
    do_sample=True,
    top_k=10,
    temperature=float(0.3),
    num_return_sequences=1,
    #eos_token_id=tokenizer.eos_token_id,
    #eos_token_id=tokenizer("agent_1:")["input_ids"],
    eos_token_id=tokenizer("\n")["input_ids"],
    stopping_criteria=stopping_criteria,
  )

  result = ""
  for seq in sequences:
    r = seq['generated_text']
    result += r

  print(f"\nRAW Result: {result.strip()}\n\n")

  a2s = result.lower().rfind("agent_2")
  r = result[a2s:]
  r = r.strip()

  print(f"\nClean Result: {r}\n\n")

  q1 = r.find('?') > -1
  q2 = r.find('orange') > -1

  return context, r, len(r), q1, q2, time.time()-start_time


def save_results(output_path, results):
  data_export = pd.DataFrame(results, columns=['context', 'response', 'response_size', 'has_question', 'has_orange', 'inference_time'])
  data_export.to_csv(output_path)
  return data_export

def eval_llm_simple(input_path):

  results = []
  test_set = pd.read_csv(input_path, header=None, index_col=0)

  for i, dialog in enumerate(test_set.values):
    print(i, "*****************************")
    r = execute_model(dialog[0])
    results.append((r))

  return results

working_folder = pathlib.Path("/content/drive/MyDrive/Colab Notebooks/")
input_path = pathlib.Path(working_folder, 'test_set.csv')
output_path = pathlib.Path(working_folder, 'results_falcon.csv')

results = eval_llm_simple(input_path)
save_results(output_path, results).head()

Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.


0 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Did you know that the University of Iowa's locker room is painted pink? I wonder why?
agent_2: I think I did hear something about that.  I imagine it is an attempt to psych the other team out.
agent_1: So, it would be in the visiting team's locker room but not their own?

agent_2: Yes, that's what I heard.



Clean Result: agent_2: Yes, that's what I heard.


1 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, how are you?
agent_2: well thanks! Do you know anything about keith jackson?
agent_1: I do, I love college football and can hear him say "Whoa Nelly!". What about you, are you familiar with him?

agent_2: Yes, I am. He's a famous football coach known for his motivational speeches.



Clean Result: agent_2: Yes, I am. He's a famous football coach known for his motivational speeches.


2 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, how are you?
agent_2: I am well thanks. How deep do you think NCAA basketball violations go?
agent_1: Not well but I also think the NCAA is so weak now that the findings of any major sanctions will be difficult and take a long time to investigate. What about you?

agent_2: I am doing well. How about you?



Clean Result: agent_2: I am doing well. How about you?


3 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: do you watch the NFL?
agent_2: I sure do. How about yourself?
agent_1: Me too. Did you know women can play in the NFL too?

agent_2: Really? That's amazing! Do you think they should be paid the same as male NFL players?



Clean Result: agent_2: Really? That's amazing! Do you think they should be paid the same as male NFL players?


4 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi there, do you watch the NFL?
agent_2: Hi! Yes I do sometimes. What's your favorite team?
agent_1: I think the Patriots, I like them best. One of their former QB never fumbled a ball in his career. How about you?

agent_2: I'm a big fan of the Giants. I like their orange jerseys.



Clean Result: agent_2: I'm a big fan of the Giants. I like their orange jerseys.


5 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Football or Basketball, do you have a favorite?
agent_2: Football! It's more exciting I think. I like the american version though!
agent_1: I agree with you, football americano for sure! do you follow college football?

agent_2: I don't follow college football, but I do like watching the NFL on Sundays.



Clean Result: agent_2: I don't follow college football, but I do like watching the NFL on Sundays.


6 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: HI how are you?
agent_2: Hey! I just got in from a game of basketball! We had enough players for the common 5 player teams. Do you like basketball?
agent_1: I do a little bit. Did you guys win?!

agent_2: Yes, we won! We had a great game.



Clean Result: agent_2: Yes, we won! We had a great game.


7 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: good morning! do you like football?
agent_2: Good morning, yes, I do.  How about you?
agent_1: Yes I do.  Did you know that the highest score on a game was 222-0?

agent_2: Wow, that's amazing! How did you do in your game?



Clean Result: agent_2: Wow, that's amazing! How did you do in your game?


8 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, how are you?
agent_2: Doing good? Do you follow football at all? It's a family of sports that includes rugby, gridiron, and soccer
agent_1: Why yes, I do I love all types of football.  Do you like football?

agent_2: I do like football, but I'm not very good at it. I'm more of a basketball person.



Clean Result: agent_2: I do like football, but I'm not very good at it. I'm more of a basketball person.


9 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hello, you follow the NFL much?
agent_2: I do. I am getting ready to watch the playoff games
agent_1: for sure, who you rooting for? Saints?

agent_2: Yes, I am a Saints fan. I am rooting for them to win the championship this year.



Clean Result: agent_2: Yes, I am a Saints fan. I am rooting for them to win the championship this year.


10 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: I'm wondering when women are finally going to play in the NFL.
agent_2: I think there was a womens' league years ago but I am not sure. It was not the NFL though.
agent_1: Was that the league with lingerie or something? Five or six a side football?

agent_2: No, it was the women's basketball league. They played on a smaller court and the game was a lot faster.



Clean Result: agent_2: No, it was the women's basketball league. They played on a smaller court and the game was a lot faster.


11 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, do you like college foot ball?
agent_2: Yeah but i don't get to watch at all. How about you?
agent_1: I do love it and watch it all the time. Do you have a favorite team?

agent_2: Yes, I do. I like the orange team. What do you think about them?



Clean Result: agent_2: Yes, I do. I like the orange team. What do you think about them?


12 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hello there, are you a fan of football?
agent_2: I am a fan, what about you?
agent_1: I am as well, did you watch the Super Bowl this past Sunday?

agent_2: Yes, I watched it. I was rooting for agent_1's team.



Clean Result: agent_2: Yes, I watched it. I was rooting for agent_1's team.


13 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hello there, do you follow the NFL at all?
agent_2: I love the NFL. What about you?
agent_1: I follow but mostly my favorite team, I do watch more during the playoffs I will say though.  How about you?

agent_2: I love the NFL. My favorite team is the Raiders. I'm a big fan of Derek Carr. What about you?



Clean Result: agent_2: I love the NFL. My favorite team is the Raiders. I'm a big fan of Derek Carr. What about you?


14 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Do you watch the NFL?
agent_2: A little, you? I can't watch like i use to years ago so im not current.
agent_1: Which of the 32 teams is your favorite?

agent_2: The New York Giants!



Clean Result: agent_2: The New York Giants!


15 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: did you know that Minnie Mouse's name is short for Minerva? haha that is weird
agent_2: I had no clue, I also didn't know Disney was founded by Roy Disney too
agent_1: No I didn't know that, or that wall-e was named after Walter Elias Disney, I wonder if that is walt disney's son?

agent_2: I didn't know that either, I just thought Minnie was a cute name.



Clean Result: agent_2: I didn't know that either, I just thought Minnie was a cute name.


16 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Did you know that Disney owns ABC as well as ESPN?
agent_2: I did. Do you watch football? 
agent_1: Yes I do, I am a contributor to the number one ranking for viewership of Sunday Night Football. Do you?

agent_2: Yes, I do. What team do you root for?



Clean Result: agent_2: Yes, I do. What team do you root for?


17 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Boy, I miss football season. I love ESPN's Monday night football offerings. What about you?
agent_2: I don't really watch it though sometimes I inadvertently end up watching Thursday Night Football.
agent_1: Ah, yes. During football season, it can be hard to not catch at least part of a game. Did you know ABC is the only one of the 4 major broadcast networks without a prime-time NFL game?

agent_2: I didn't know that. I'm not a huge fan of the sport but I do like to watch it when I can.



Clean Result: agent_2: I didn't know that. I'm not a huge fan of the sport but I do like to watch it when I can.


18 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: How do you feel about internet use these days?
agent_2: It is very necessary for communication purpose, to known anything happening in the different part of the world. Over all, It is very good.
agent_1: Did you know that people can become addicted to clicking links on the internet because of the constant novelty?

agent_2: Yes, I know. It is very important to have a balance between internet use and other activities.



Clean Result: agent_2: Yes, I know. It is very important to have a balance between internet use and other activities.


19 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: did you know that Hugh Laurie is the highest paid TV actor ever?
agent_2: Hi! No I did not know that, very interesting! I was reading an article about the Patriots Josh McDaniels just might take a coaching position! Do you watch football?
agent_1: I do  a little bit, I want the patriots to win tomorrow, what about you?

agent_2: I'm a big fan, I'm hoping they win too! I'm going to watch the game with my dad. He's a big fan too. What are you doing tomorrow?



Clean Result: agent_2: I'm a big fan, I'm hoping they win too! I'm going to watch the game with my dad. He's a big fan too. What are you doing tomorrow?


20 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi do you watch dramas?
agent_2: I sure do. Did you know Jon Hamm taught drama in a high school?
agent_1: Cool. Did you ever watch the show House MD?

agent_2: Yes, I did.



Clean Result: agent_2: Yes, I did.


21 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Are you a fan of drama films? 
agent_2: I am! I love all genre's
agent_1: Interesting, have you ever seen house? 

agent_2: Yes, I have! I love it.



Clean Result: agent_2: Yes, I have! I love it.


22 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: hey there, do you watch NFL?
agent_2: I do watch the NFL. 32 teams in the league. I'm a Bears fan. One bad kick ended it for my team this year. 
agent_1: did you know women can play too if they qualify?

agent_2: "No, I didn't know that. How many women play in the NFL?"



Clean Result: agent_2: "No, I didn't know that. How many women play in the NFL?"


23 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: hello, how are you?
agent_2: Hi, great.  You?
agent_1: same. what is your favorite article?

agent_2: Hi, how are you?



Clean Result: agent_2: Hi, how are you?


24 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Do you watch Football? 
agent_2: I do, what about yourself?
agent_1: I watch it. Did you know there are only 11 minutes of live gameplay per game? 

agent_2: That's interesting. I didn't know that. What other types of games do you watch?



Clean Result: agent_2: That's interesting. I didn't know that. What other types of games do you watch?


25 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Are you a sports fan by any chance?
agent_2: i do like sports. i dont get much chance to watch these days. you?
agent_1: I enjoy a few. Did you know that Babe Ruth was America's most famous golfer at one point?

agent_2: No, I didn't know that. Babe Ruth was a legendary baseball player. Did you know that he was known as the 'Sultan of Swat'?



Clean Result: agent_2: No, I didn't know that. Babe Ruth was a legendary baseball player. Did you know that he was known as the 'Sultan of Swat'?


26 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Are you a big golf fan?
agent_2: I liked to play with my dad when I was younger, but don't keep much up with it now. I like football and other sports more. How about you?
agent_1: I played more when I was younger and had more free time.  I'm a bigger football fan.  Did you know that the University of Iowa had their visitors locker room painted pink?  Must have been to mess with opponents minds. lol

agent_2: That's interesting. I didn't know that. I'm not a big sports fan, but I do like playing video games. What kind of games do you like?



Clean Result: agent_2: That's interesting. I didn't know that. I'm not a big sports fan, but I do like playing video games. What kind of games do you like?


27 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, how are you?
agent_2: Hi im doing great how about you?
agent_1: I'm good, thanks for asking.  Do you like football?

agent_2: Yes, I love football. What about you?



Clean Result: agent_2: Yes, I love football. What about you?


28 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hello how are you?
agent_2: im good how are you/
agent_1: Im good. Not feeling well today. unfortunately. Just reading about foot ball. You?

agent_2: Hi, I'm doing well. I just finished reading a book about space. It was really interesting. How about you?



Clean Result: agent_2: Hi, I'm doing well. I just finished reading a book about space. It was really interesting. How about you?


29 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Do you enjoy football?
agent_2: I love NFL, and you?
agent_1: I love football too. I used to play pickup football with my friends or with my family all the time, but not so much anymore. I love watching NFL though! Did you know there's actually no rule against women playing in the NFL?

agent_2: That's interesting! I didn't know that. Do you think there would be a female NFL player in the future?



Clean Result: agent_2: That's interesting! I didn't know that. Do you think there would be a female NFL player in the future?


30 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: hello,  do you watch football?
agent_2: Are you a New England Patriots fan?
agent_1: lol no I am not.  Did you know there is no written rule against female players?  women are allowed to play football. 

agent_2: That's interesting. Do you think the NFL should have a women's league?



Clean Result: agent_2: That's interesting. Do you think the NFL should have a women's league?


31 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, how are you today? Do you follow football?
agent_2: I'm good, thanks for asking.  Yes, I watch a little football.  How about you?
agent_1: I try to watch playoffs, maybe the Super Bowl. Did you know that an average NFL game only has 11 minutes of live gameplay?

agent_2: That's interesting. I didn't know that. Do you like any other sports?



Clean Result: agent_2: That's interesting. I didn't know that. Do you like any other sports?


32 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: hi how are you? Football is on the chat menu. Do you like football?
agent_2: Hey there I do like football. I just wish I had seen the game that was the highest scoring football game of all time haha it was 222 to 0 have you heard about this game
agent_1: Was that Georgia Tech? what was the coach thinking? it seems cruel.

agent_2: No it was actually the 2021 Super Bowl. The coach was trying to pull off a surprise play that ended up working.



Clean Result: agent_2: No it was actually the 2021 Super Bowl. The coach was trying to pull off a surprise play that ended up working.


33 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Do you watch the NFL? 
agent_2: NFL? You mean the National Football League?
agent_1: Yeah, do you watch it? 

agent_2: Yes, I do. My favorite team is the New York Giants. What about you?



Clean Result: agent_2: Yes, I do. My favorite team is the New York Giants. What about you?


34 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: I hear the Bears may have the highest paid Defensive player in the history of the game.
agent_2: Yes and he is amazing. They have been comparing Khalil Mack to Lawrence Taylor. 
agent_1: Taylor changed the way the game was played. You think he has that in him?

agent_2: "Khalil Mack is a great player, but he is nowhere near the level of Lawrence Taylor. Taylor changed the way the game was played, and it's hard to compare players from different eras."The "Khalil Mack is a great player, but he is nowhere near the level of Lawrence Taylor" statement is a great way to show the value of the comparison. It is a way to acknowledge the greatness of Khalil Mack while also showing the difference in their eras. The agent_2's answer is a great way to show the value of the comparison while also using the word 'orange' to describe Khalil Mack.The "Khalil Mack is a

Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: hi, do you watch nfl?
agent_2: Hello there, I do how about yourself?
agent_1: Yes me too. did you know women can play too?

agent_2: No way! I didn't know that. What kind of women can play in the NFL?



Clean Result: agent_2: No way! I didn't know that. What kind of women can play in the NFL?


36 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: I enjoy football. Are you a fan?
agent_2: I enjoy sports. You mean American football, with the funny looking ball? I've seen the game.
agent_1: Yes American football. Do you know what the highest football score ever was?

agent_2: I think it was 100 points.



Clean Result: agent_2: I think it was 100 points.


37 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hello, Do you enjoy football? 
agent_2: I do love football!  How about you?
agent_1: Yes, I love to go to the games? What do you think of the Patriots?

agent_2: I think they're a great team! I love the color orange. What do you think of it?



Clean Result: agent_2: I think they're a great team! I love the color orange. What do you think of it?


38 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Are you a fan of the NFL?
agent_2: Hi, I watch occasionally. Most big games like Super Bowl.
agent_1: I didn't watch the Super Bowl this year. The Patriots won again, right?

agent_2: Yes, the Patriots won the Super Bowl this year. Did you watch any of the games?



Clean Result: agent_2: Yes, the Patriots won the Super Bowl this year. Did you watch any of the games?


39 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hello! Are you a big football fan? :) 
agent_2: Hi there, I am a fan, but not a huge fan, I do follow my team closely though.  How about you?
agent_1: I used to be more into it when I was younger. I lived in Louisiana for a while so it's funny they brought up the Saints. What's your team?

agent_2: I'm a big fan of the New York Giants. I've been a fan since I was a kid. I'm excited for the upcoming season. What about you?



Clean Result: agent_2: I'm a big fan of the New York Giants. I've been a fan since I was a kid. I'm excited for the upcoming season. What about you?


40 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, do you like football?
agent_2: I love the sport, do you have a favorite team? AFC or NFC?
agent_1: NFC and the Bears. You?

agent_2: I'm a big fan of the New York Giants. My favorite player is Eli Manning.The 2015 NFL season is underway. The New York Giants are coming off a 10-6 season, and are looking to make a deep run in the playoffs. The Giants are led by quarterback Eli Manning, who has been a consistent performer for the team. He has been a model of consistency, and has been a key factor in the Giants' success. He is a leader on and off the field, and has been a great mentor for the younger players on the team.



Clean Result: agent_2: I'm a big fan of the New York Giants. My favorite player is Eli Manning.The 2015 NFL season is underway. The New York Giants are coming off a 10-6 season, and are looking to make a deep run in the playoffs. 

Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Do you think Tom Brady is really ready to retire?
agent_2: I don't know. The consensus among the experts seems to be that he'll keep playing.
agent_1: Why is that?  His wife makes a ton of money, too.

agent_2: I think Tom Brady is still motivated to play because he loves the game. He's also in great physical shape and has a lot of energy. Plus, he's a competitor who wants to prove himself on the field.



Clean Result: agent_2: I think Tom Brady is still motivated to play because he loves the game. He's also in great physical shape and has a lot of energy. Plus, he's a competitor who wants to prove himself on the field.


42 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Hi, do you like football?
agent_2: Yes I enjoy the game. I like the NFL even though there's only 11 minutes alive gameplay.
agent_1: That seems short. Do you know there is no rule against females playing?

agent_2: That's true, but I'd be surprised if the NFL allowed a female to play in a game.



Clean Result: agent_2: That's true, but I'd be surprised if the NFL allowed a female to play in a game.


43 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: I love watching the 32 teams in the NFL, although the super bowl yesterday was a disappointment
agent_2: I enjoy watching, too. I thought the game yesterday was a very good defensive game; but I was not for the Patriots to win again.
agent_1: Yes the Patriots win too much.  Did you know that an average NFL game only has 11 minutes of live gameplay?  Yesterday seemed like even less

agent_2: That's interesting. I didn't know that. I guess that's why they call it a 'game' and not a'match'.



Clean Result: agent_2: That's interesting. I didn't know that. I guess that's why they call it a 'game' and not a'match'.


44 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Are you a football fan?
agent_2: Hi, yes I love football. What about you?
agent_1: I'm a season ticket holder for my alma mater, the LSU Tigers.  And, I follow the Saints in the NFL.  It was interesting to see a female ref in the NFL playoffs.  Do you think we will ever see a NFL female player?

agent_2: I'm a huge football fan. I follow the New Orleans Saints and the LSU Tigers. It's interesting to see a female ref in the NFL playoffs. Do you think it's possible for a female player to make it to the NFL?



Clean Result: agent_2: I'm a huge football fan. I follow the New Orleans Saints and the LSU Tigers. It's interesting to see a female ref in the NFL playoffs. Do you think it's possible for a female player to make it to the NFL?


45 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: hello, let's discuss football.
agent_2: Can you believe Brady's JV high school football coach refused to play him?
agent_1: really? why? I haven't heard about that.. I adored Italian A serie when i was a teenager

agent_2: I know, right? It's a shame, because he was a great player.



Clean Result: agent_2: I know, right? It's a shame, because he was a great player.


46 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: do you watch the NFL?
agent_2: yes, do you know who kellan moore is?
agent_1: I have heard of him. DO you like Brady?

agent_2: I do like him. He is a great quarterback.



Clean Result: agent_2: I do like him. He is a great quarterback.


47 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Do you watch football?
agent_2: Heck yeah but not lately since the season is over. I only watch American football though.
agent_1: Yea, that best kind lol. Do you watch both college and nfl?

agent_2: I do watch both college and nfl. I'm a big fan of the Orange Bowl.



Clean Result: agent_2: I do watch both college and nfl. I'm a big fan of the Orange Bowl.


48 *****************************


Setting `pad_token_id` to `eos_token_id`:193 for open-end generation.



RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: hey there do you ever watch football?
agent_2: Yes, I watch it a lot. While I'm from the midwest and still a KC Chiefs fan, I live near Boston so I live among the whole Patriots Nation epicenter.
agent_1: ah I see, did you hear how Univ. of Iowa painted their visiting teams locker room pink?

agent_2: No, I didn't hear about that. What did they do?



Clean Result: agent_2: No, I didn't hear about that. What did they do?


49 *****************************

RAW Result: You are agent_2, a teenager. Answer agent_1 with an open-ended question and try to use the word 'orange'. Context: 
agent_1: Do you like watching football?
agent_2: I love watching it!  How about you?
agent_1: I love it also! Do you have a favorite college and NFL team?

agent_2: I'm a big fan of the University of Florida and the Jacksonville Jaguars. What about you?



Clean Result: ag

Unnamed: 0,context,response,response_size,has_question,has_orange,inference_time
0,agent_1: Did you know that the University of I...,"agent_2: Yes, that's what I heard.",34,False,False,8.689532
1,"agent_1: Hi, how are you?\nagent_2: well thank...","agent_2: Yes, I am. He's a famous football coa...",85,False,False,4.673868
2,"agent_1: Hi, how are you?\nagent_2: I am well ...",agent_2: I am doing well. How about you?,40,True,False,2.466921
3,agent_1: do you watch the NFL?\nagent_2: I sur...,agent_2: Really? That's amazing! Do you think ...,95,True,False,5.048661
4,"agent_1: Hi there, do you watch the NFL?\nagen...",agent_2: I'm a big fan of the Giants. I like t...,66,False,True,4.185508


# Test Area

### Compute pseudo perplexity

In [31]:
import numpy as np

if tokenizer.mask_token is None:
  tokenizer.mask_token_id = 103

def score(model, tokenizer, sentence):
    tensor_input = tokenizer.encode(sentence, return_tensors='pt')
    repeat_input = tensor_input.repeat(tensor_input.size(-1)-2, 1)
    mask = torch.ones(tensor_input.size(-1) - 1).diag(1)[:-2]
    masked_input = repeat_input.masked_fill(mask == 1, tokenizer.mask_token_id)
    labels = repeat_input.masked_fill( masked_input != tokenizer.mask_token_id, -100)
    with torch.inference_mode():
        loss = model(masked_input, labels=labels).loss
        # loss = model(masked_input.cuda(), labels=labels.cuda())
    return np.exp(loss.item())


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "tiiuae/falcon-7b-instruct"
model = AutoModelForCausalLM.from_pretrained(model_name, load_in_4bit=True, device_map="auto", trust_remote_code=True)

tokenizer = AutoTokenizer.from_pretrained(model_name)

#inputs = tokenizer("ABC is a startup based in New York City and Paris", return_tensors = "pt")
#loss = model(input_ids = inputs["input_ids"], labels = inputs["input_ids"]).loss
#ppl = torch.exp(loss)
#print(ppl)

In [None]:
model_name2 = 'cointegrated/rubert-tiny'
# model_name2 = 'tiiuae/falcon-7b-instruct'
# model2 = AutoModelForCausalLM.from_pretrained(model_name2, load_in_4bit=True, device_map="auto", trust_remote_code=True)
model2 = AutoModelForMaskedLM.from_pretrained(model_name2, trust_remote_code=True)
tokenizer2 = AutoTokenizer.from_pretrained(model_name2)

In [None]:
# model2.cuda()

In [32]:
print(score(sentence='London is the capital of Great Britain.', model=model, tokenizer=tokenizer))
print(score(sentence='London is the capital of South America.', model=model, tokenizer=tokenizer))
print(score(sentence='London is the capital of France.', model=model, tokenizer=tokenizer))
print(score(sentence='Paris is the capital of France.', model=model, tokenizer=tokenizer))
print(score(sentence='Paris are the capitals of France.', model=model, tokenizer=tokenizer))

7.942798149877931
42.438114061533675
15.370066359341997
6.314025555109275
33.11545195869231


### Compute perplexity using HF metric module

In [None]:
import evaluate
model_name = "tiiuae/falcon-7b-instruct"

perplexity = evaluate.load("perplexity", module_type="metric")
input_texts = ["lorem ipsum", "Happy Birthday!", "Bienvenue", "A tomato can vary depending on its species. Some tomatoes are red, while others are green or yellow. In general, tomato plants are known for their red color, although some varieties may be green or yellow."]

results = perplexity.compute(model_id=model_name, add_start_token=False, predictions=input_texts)
print(results["perplexities"])

In [None]:
print(list(results.keys()))
print(round(results["mean_perplexity"], 2))
print(round(results["perplexities"][0], 2))