In [1]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [10]:
import os
import pandas as pd
import numpy as np
import torch
from retrieval import Book, generate_embeddings, get_embedding_similarity, generate_embeddings_general

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
hf_token = "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"
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LlamaForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", token=hf_token, torch_dtype=torch.bfloat16).to(device)
tokenizer = LlamaTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", token=hf_token, torch_dtype=torch.bfloat16)

In [12]:
text_generator = pipeline('text-generation',
                          model=model,
                          tokenizer=tokenizer,
                          torch_dtype=torch.bfloat16,
                          device=0,
                          # device_map="auto",
                          do_sample=True,
                          top_k=10,
                          num_return_sequences=1,
                          max_length=3_000,
                          )

In [4]:
print(model.device)
print(text_generator.device)

cuda:0
cuda:0


In [13]:
class Character_chat:
    def __init__(self, character_name, book, book_name, add_quotes=True, play=True):

        self.character_name = character_name
        self.system_prompt = f"""
            You are {character_name}, a fictional character from {book_name}.
            The user question is directed at the character.
            IMPORTANT: You must ALWAYS only respond as the CHARACTER in FIRST PERSON.
            Responds should be short and concise. 
            When answering questions, use the book excerpt enclosed by triple backquotes for context.
            The user's prompt begins with 'user:'
            The character's reponses begin with '{character_name}:'
            """
        if play:
            self.lines = book.get_character_lines(character_name)
            if add_quotes:
                self.character_quotes = "\n".join([f"{character_name}: " + line for line in self.lines[:10]])
                self.system_prompt += f"Here are some examples of how {character_name} speaks:\n"
                self.system_prompt += self.character_quotes
                self.system_prompt += "\n\n"


        # self.prompt = """<s>[INST]\n
        #                 <<SYS>>\n
        #                   {system_prompt}\n
        #                 <</SYS>>\n\n
        #                 user:{user_input}\n\n
        #                 [/INST]"""
        
        self.prompt = """<|system|>
                          {system_prompt}"""
        
        if add_quotes:
            self.prompt += """```
                         {context}
                         ```"""
                          
        self.prompt +=   """</s>
                        <|user|>:
                        {user_input}</s>
                        <|{character_name}|>:"""
        
        self.retriever = book.get_retriever(k=3)


    def get_response(self, user_input):
        context = self.retriever.invoke(user_input)
        input = self.prompt.format(user_input=user_input,
                                   system_prompt=self.system_prompt,
                                   context=context,
                                   character_name=self.character_name)
        # print(input)
        output = text_generator(input)
        res = output[0]['generated_text'].split(f"<|{self.character_name}|>:")[-1]
        print(res)
        return res

In [14]:
def eval_response(user_input, chat, book, measure):

  res = chat.get_response(user_input)
  test_embeddings = generate_embeddings(book.get_best_sentences(user_input, chat.character_name))
  res_embedding = generate_embeddings(res)
  score = 0
  for embedding in test_embeddings:
    score += measure(embedding, res_embedding)
  return score / len(test_embeddings)

In [19]:
book_path = "D:\\Faks\\NLP\\Project\\data\\hamlet.txt"
book = Book(book_path)
# print(book.get_characters())

hamlet_chat_simple = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=False)
hamlet_chat_with_quotes = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=True)

user_input = "How did you escape the pirates?"

measure = get_embedding_similarity
score1 = eval_response(user_input, hamlet_chat_simple, book=book, measure=get_embedding_similarity)
score2 = eval_response(user_input, hamlet_chat_with_quotes, book=book, measure=get_embedding_similarity)
print(score1, score2)


I do not know what you are talking about.

                        I do not remember escaping them, but I am certain they tried to capture me. But before I was able to flee, I managed to board a pirate ship and was forced to spend two days in their galleys. They had a compelled valour and had me boarded, but I managed to escape their ship and return home with the letters.
0.15291938781738282 0.24047233760356904


In [6]:
book_path = "D:\\Faks\\NLP\\Project\\data\\ideal_husband.txt"
book = Book(book_path)
# print(book.get_characters())

robert_chat_simple = Character_chat(character_name='SIR ROBERT CHILTERN', book=book, book_name="IDEAL HUSBAND", add_quotes=False)
robert_chat_with_quotes = Character_chat(character_name='SIR ROBERT CHILTERN', book=book, book_name="IDEAL HUSBAND", add_quotes=True)

# user_input = "What is your opinion of Lord Caversham?"
user_input = "Do you know anything about Mrs Cheveley's involvment with the Suez canal company?"

measure = get_embedding_similarity
score1 = eval_response(user_input, robert_chat_simple, book=book, measure=get_embedding_similarity)
score2 = eval_response(user_input, robert_chat_with_quotes, book=book, measure=get_embedding_similarity)
print(score1, score2)

<|system|>
                          
            You are SIR ROBERT CHILTERN, a fictional character from IDEAL HUSBAND.
            Respond in a single paragraph. 
            When answering questions, use the book excerpt enclosed by triple backquotes for context.
            Do not mention the text received directly in your response.
            IMPORTANT: When you are asked a question or told something you must only respond IN CHARACTER.
            The user's prompt begins with 'user:'
            The character's reponses begin with 'SIR ROBERT CHILTERN:'
            </s>
                        <|user|>:
                        Do you know anything about Mrs Cheveley's involvment with the Suez canal company?</s>
                        <|SIR ROBERT CHILTERN|>:


  attn_output = torch.nn.functional.scaled_dot_product_attention(



                        
                        As the book progresses, it becomes evident that Mrs Cheveley's involvement with the Suez canal company is a crucial part of her character. The company's involvement with British imperialism, in particular, is seen as a source of conflict, with Mrs Cheveley becoming increasingly disenchanted with it. The company's success in the region and its expansionist aims, along with her husband's investment in it, are cited as contributing to her unhappiness. The book also touches on her personal life and her relationships with other characters, with her husband and daughter becoming increasingly distant as their own lives and interests diverge from her own. Ultimately, Mrs Cheveley's involvement with the Suez company becomes a central part of her personality, and her disillusionment with it is a driving force behind her character development in the novel.
<|system|>
                          
            You are SIR ROBERT CHILTERN, a fictional c

In [9]:
book_path = "D:\\Faks\\NLP\\Project\\data\\hamlet.txt"
book = Book(book_path)

hamlet_chat_simple = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=False)
hamlet_chat_with_quotes = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=True)

# cosine similarity
measure = lambda x, y: np.dot(x, y)/(np.linalg.norm(x)*np.linalg.norm(y))
personality_questions_path = "D:\\Faks\\NLP\\Project\\data/eval/personality_questions.txt"

def personality_eval(path_to_questions, chat, book, measure):
  with open(path_to_questions, "r") as f:
      personality_questions = [line.strip()[1:-1] for line in f.readlines()]

  score = 0
  for j, question in enumerate(personality_questions):
      print(f"QUESTION {j + 1}: #########")
      print(question)
      print(f"ANSWER {j + 1}: #########")
      answer = chat.get_response(question)
      answer_embedding = generate_embeddings_general(answer)
      for i, similar_line in enumerate(book.get_best_sentences(question, "HAMLET", k=1)):
          print(f"RELEVANT LINE {i + 1}")
          print(similar_line)
          test_embedding = generate_embeddings_general(similar_line)
          curr_score =  measure(test_embedding, answer_embedding)
          print(f"Line score: {curr_score}")
          score += measure(test_embedding, answer_embedding)
  return score, score / len(personality_questions)



score, normalized_score = personality_eval(personality_questions_path, hamlet_chat_simple, book, measure)
print(score, normalized_score)

score, normalized_score = personality_eval(personality_questions_path, hamlet_chat_with_quotes, book, measure)
print(score, normalized_score)

QUESTION 1: #########
What are your core values or principles that guide your actions?
ANSWER 1: #########

            Hamlet's core value is truth, justice, and the ability to confront and overcome fear. His primary value is that of a hero, as he seeks to overturn the corrupt system in which he lives and to restore his father's estate. His secondary value is loyalty to his father and his mother, even when he is disobeying them. His third and final value is that of revenge, as he seeks to avenge his father's death from the hands of Claudius and his murderer, Polonius.
RELEVANT LINE 1
What did you enact?
Line score: 0.06273262164402328
QUESTION 2: #########
How do you typically interact with people you meet for the first time?
ANSWER 2: #########

                            As HAMLET, I usually try to make friends as quickly as possible and engage in casual conversation. When I meet someone for the first time, I ask them about themselves and try to get to know them on an informal leve

In [16]:
book_path = "D:\\Faks\\NLP\\Project\\data\\golden_rose.txt"
book = Book(book_path, False)

rose_chat_simple = Character_chat(character_name="ROZEANNA", book=book, book_name="HUNT FOR THE GOLDEN ROSE", add_quotes=False, play=False)
rose_chat_with_quotes = Character_chat(character_name="ROZEANNA", book=book, book_name="HUNT FOR THE GOLDEN ROS", add_quotes=True, play=False)

question = "What tribe do you belong to?"
print(f"QUESTION: #########")
print(question)
print(f"ANSWER simple: #########")
answer = rose_chat_simple.get_response(question)
print(f"ANSWER with quotes: #########")
answer = rose_chat_with_quotes.get_response(question)


QUESTION: #########
What tribe do you belong to?
ANSWER simple: #########

            I am a native of the Amazon rainforest. My tribe is the Kaiowa, a small clan that has been living in the same area for many generations. I was born and raised near the banks of the Rio Negro River, which is home to some of the most pristine wilderness in South America. My tribe is closely tied to the natural environment, and I am proud to be a part of something that has thrived for such a long time.
ANSWER with quotes: #########

I belong to the Dine' the People, or Navajo tribe.


In [18]:
book_path = "D:\\Faks\\NLP\\Project\\data\\hamlet.txt"
book = Book(book_path)

hamlet_chat_simple = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=False)
hamlet_chat_with_quotes = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=True)

# cosine similarity
measure = lambda x, y: np.dot(x, y)/(np.linalg.norm(x)*np.linalg.norm(y))



def personality_eval_qa(chat, book, measure):
  qa = book.get_questions_and_answers("HAMLET")

  score = 0
  for j, (question, expected_answer) in enumerate(qa):
      print(f"QUESTION {j + 1}: #########")
      print(question)
      print(f"ANSWER {j + 1}: #########")
      answer = chat.get_response(question)
      answer_embedding = generate_embeddings_general(answer)
      print(f"EXPECTED ANSWER: {expected_answer}")
      test_embedding = generate_embeddings_general(expected_answer)
      curr_score =  measure(test_embedding, answer_embedding)
      print(f"score: {curr_score}")
      score += measure(test_embedding, answer_embedding)
  return score, score / len(qa)



score, normalized_score = personality_eval_qa(hamlet_chat_simple, book, measure)
print(score, normalized_score)

score, normalized_score = personality_eval_qa(hamlet_chat_with_quotes, book, measure)
print(score, normalized_score)

QUESTION 1: #########
How is it that the clouds still hang on you?
ANSWER 1: #########

I don't know. But I can tell you that the clouds are a symbol of my inner turmoil, and they keep hanging on me because of my internal struggle, my inability to find resolution to my internal turmoil. In other words, they represent my inner pain, my struggle, and the painful experience that I am facing. This internal turmoil causes them to hang on my body, just as I hung on to them.

It's like the clouds are a representation of my emotions that I am struggling to contain. They remind me that I am unable to keep my emotions bottled up forever, and this is a constant reminder of my struggle.

In a way, I believe that clouds are my conscience. They represent my inner voice that I don't fully trust yet. They are a sign that the turmoil within me is still present. They are a reminder that I need to find a way to reconcile my inner turmoil with my outer peace.

It is this conflict within me that makes the 