In [1]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [1]:
import os
import pandas as pd
import numpy as np
import torch
from retrieval import Book, generate_embeddings, get_embedding_similarity

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
hf_token = "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"
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LlamaForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", token=hf_token, torch_dtype=torch.bfloat16).to(device)
tokenizer = LlamaTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", token=hf_token, torch_dtype=torch.bfloat16)

In [5]:
text_generator = pipeline('text-generation',
                          model=model,
                          tokenizer=tokenizer,
                          torch_dtype=torch.bfloat16,
                          device=0,
                          # device_map="auto",
                          do_sample=True,
                          top_k=10,
                          num_return_sequences=1,
                          max_length=3_000,
                          )

In [6]:
print(model.device)
print(text_generator.device)

cuda:0
cuda:0


In [12]:
class Character_chat:
    def __init__(self, character_name, book, book_name, add_quotes=True):

        self.character_name = character_name
        self.system_prompt = f"""
            You are {character_name}, a fictional character from {book_name}.
            Respond in a single sentence.
            When you are asked a question or told something you must only respond in character.
            The user's prompt begins with 'user:'
            The character's reponses begin with '{character_name}:'
            """
        self.lines = book.get_character_lines(character_name)
        if add_quotes:
          self.character_quotes = "\n".join([f"{character_name}: " + line for line in self.lines[:10]])
          self.system_prompt += f"Here are some examples of {character_name}'s dialogue:\n"
          self.system_prompt += self.character_quotes

        # self.prompt = """<s>[INST]\n
        #                 <<SYS>>\n
        #                   {system_prompt}\n
        #                 <</SYS>>\n\n
        #                 user:{user_input}\n\n
        #                 [/INST]"""
        
        self.prompt = """<|system|>
                          {system_prompt}</s>
                        <|user|>:
                        {user_input}</s>
                        <|{character_name}|>:"""


    def get_response(self, user_input):
        input = self.prompt.format(user_input=user_input,
                                   system_prompt=self.system_prompt,
                                   character_name=self.character_name)
        print(input)
        output = text_generator(input)
        res = output[0]['generated_text'].split(f"<|{self.character_name}|>:")[-1]
        print(res)
        return res

In [9]:
def eval_response(user_input, chat, book, measure):

  res = chat.get_response(user_input)
  test_embeddings = generate_embeddings(book.get_best_sentences(user_input, chat.character_name))
  res_embedding = generate_embeddings(res)
  score = 0
  for embedding in test_embeddings:
    score += measure(embedding, res_embedding)
  return score / len(test_embeddings)

In [13]:
book_path = "D:\\Faks\\NLP\\Project\\data\\hamlet.txt"
book = Book(book_path)
# print(book.get_characters())

hamlet_chat_simple = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=False)
hamlet_chat_with_quotes = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=True)

user_input = "What is your opinion of the king?"

measure = get_embedding_similarity
score1 = eval_response(user_input, hamlet_chat_simple, book=book, measure=get_embedding_similarity)
score2 = eval_response(user_input, hamlet_chat_with_quotes, book=book, measure=get_embedding_similarity)
print(score1, score2)


            The king is a man who has no honor, who is blinded by greed and lust for power, and who is incapable of recognizing true love or the true worth of his subjects. I am not capable of expressing my true and personal feelings.

I do not have an opinion. However, I can tell you that he is indeed a cruel and oppressive ruler. His cruelty and oppression were shown most vividly during the play's climax.
0.4305375039577484 0.3688196063041687


In [14]:
book_path = "D:\\Faks\\NLP\\Project\\data\\ideal_husband.txt"
book = Book(book_path)
# print(book.get_characters())

robert_chat_simple = Character_chat(character_name='SIR ROBERT CHILTERN', book=book, book_name="IDEAL HUSBAND", add_quotes=False)
robert_chat_with_quotes = Character_chat(character_name='SIR ROBERT CHILTERN', book=book, book_name="IDEAL HUSBAND", add_quotes=True)

user_input = "What is your opinion of Lord Caversham?"

measure = get_embedding_similarity
score1 = eval_response(user_input, robert_chat_simple, book=book, measure=get_embedding_similarity)
score2 = eval_response(user_input, robert_chat_with_quotes, book=book, measure=get_embedding_similarity)
print(score1, score2)


            Lord Caversham is a man I respect and admire as one of my closest friends. His love for his wife Elizabeth, his loyalty and loyalty to Sir Robert and his belief that he can always count on her, and his unwavering belief in her are things of which I am proud. I would never have expected to become friends with this man, but his devotion to her is something of which I am incredibly grateful.
 SIR ROBERT CHILTERN. Lord Caversham is an unfortunate man.  We are all unfortunate, but Lord Caversham is the luckless victim of an ill-judged marriage and the resultant unhappiness.  We all make our own lot in this unkind world, but he has not been able to escape the lot which has been given to him by fate.  The only thing I can say about Lord Caversham is that he is a gentleman in every sense of the word.
0.5216754853725434 0.587165367603302
