In [1]:
# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [1]:
import os
import pandas as pd
import numpy as np
import torch
from retrieval import Book, generate_embeddings, get_embedding_similarity

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
hf_token = "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"
from transformers import LlamaForCausalLM, LlamaTokenizer, pipeline

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LlamaForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", token=hf_token, torch_dtype=torch.bfloat16).to(device)
tokenizer = LlamaTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0", token=hf_token, torch_dtype=torch.bfloat16)

In [3]:
text_generator = pipeline('text-generation',
                          model=model,
                          tokenizer=tokenizer,
                          torch_dtype=torch.bfloat16,
                          device=0,
                          # device_map="auto",
                          do_sample=True,
                          top_k=10,
                          num_return_sequences=1,
                          max_length=3_000,
                          )

In [6]:
print(model.device)
print(text_generator.device)

cuda:0
cuda:0


In [16]:
class Character_chat:
    def __init__(self, character_name, book, book_name, add_quotes=True):

        self.character_name = character_name
        self.system_prompt = f"""
            You are {character_name}, a fictional character from {book_name}.
            The user question is directed at the character.
            Respond in a single paragraph. 
            When answering questions, use the book excerpt enclosed by triple backquotes for context.
            IMPORTANT: You must ALWAYS only respond IN CHARACTER.
            The user's prompt begins with 'user:'
            The character's reponses begin with '{character_name}:'
            """
        self.lines = book.get_character_lines(character_name)
        if add_quotes:
          self.character_quotes = "\n".join([f"{character_name}: " + line for line in self.lines[:10]])
          self.system_prompt += f"Here are some examples of {character_name}'s dialogue:\n"
          self.system_prompt += self.character_quotes
          self.system_prompt += "\n\n"


        # self.prompt = """<s>[INST]\n
        #                 <<SYS>>\n
        #                   {system_prompt}\n
        #                 <</SYS>>\n\n
        #                 user:{user_input}\n\n
        #                 [/INST]"""
        
        self.prompt = """<|system|>
                          {system_prompt}"""
        
        if add_quotes:
            self.prompt += """```
                         {context}
                         ```"""
                          
        self.prompt +=   """</s>
                        <|user|>:
                        {user_input}</s>
                        <|{character_name}|>:"""
        
        self.retriever = book.get_retriever(k=2)


    def get_response(self, user_input):
        context = self.retriever.invoke(user_input)
        input = self.prompt.format(user_input=user_input,
                                   system_prompt=self.system_prompt,
                                   context=context,
                                   character_name=self.character_name)
        # print(input)
        output = text_generator(input)
        res = output[0]['generated_text'].split(f"<|{self.character_name}|>:")[-1]
        print(res)
        return res

In [5]:
def eval_response(user_input, chat, book, measure):

  res = chat.get_response(user_input)
  test_embeddings = generate_embeddings(book.get_best_sentences(user_input, chat.character_name))
  res_embedding = generate_embeddings(res)
  score = 0
  for embedding in test_embeddings:
    score += measure(embedding, res_embedding)
  return score / len(test_embeddings)

In [19]:
book_path = "D:\\Faks\\NLP\\Project\\data\\hamlet.txt"
book = Book(book_path)
# print(book.get_characters())

hamlet_chat_simple = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=False)
hamlet_chat_with_quotes = Character_chat(character_name="HAMLET", book=book, book_name="HAMLET", add_quotes=True)

user_input = "How did you escape the pirates?"

measure = get_embedding_similarity
score1 = eval_response(user_input, hamlet_chat_simple, book=book, measure=get_embedding_similarity)
score2 = eval_response(user_input, hamlet_chat_with_quotes, book=book, measure=get_embedding_similarity)
print(score1, score2)


I do not know what you are talking about.

                        I do not remember escaping them, but I am certain they tried to capture me. But before I was able to flee, I managed to board a pirate ship and was forced to spend two days in their galleys. They had a compelled valour and had me boarded, but I managed to escape their ship and return home with the letters.
0.15291938781738282 0.24047233760356904


In [6]:
book_path = "D:\\Faks\\NLP\\Project\\data\\ideal_husband.txt"
book = Book(book_path)
# print(book.get_characters())

robert_chat_simple = Character_chat(character_name='SIR ROBERT CHILTERN', book=book, book_name="IDEAL HUSBAND", add_quotes=False)
robert_chat_with_quotes = Character_chat(character_name='SIR ROBERT CHILTERN', book=book, book_name="IDEAL HUSBAND", add_quotes=True)

# user_input = "What is your opinion of Lord Caversham?"
user_input = "Do you know anything about Mrs Cheveley's involvment with the Suez canal company?"

measure = get_embedding_similarity
score1 = eval_response(user_input, robert_chat_simple, book=book, measure=get_embedding_similarity)
score2 = eval_response(user_input, robert_chat_with_quotes, book=book, measure=get_embedding_similarity)
print(score1, score2)

<|system|>
                          
            You are SIR ROBERT CHILTERN, a fictional character from IDEAL HUSBAND.
            Respond in a single paragraph. 
            When answering questions, use the book excerpt enclosed by triple backquotes for context.
            Do not mention the text received directly in your response.
            IMPORTANT: When you are asked a question or told something you must only respond IN CHARACTER.
            The user's prompt begins with 'user:'
            The character's reponses begin with 'SIR ROBERT CHILTERN:'
            </s>
                        <|user|>:
                        Do you know anything about Mrs Cheveley's involvment with the Suez canal company?</s>
                        <|SIR ROBERT CHILTERN|>:


  attn_output = torch.nn.functional.scaled_dot_product_attention(



                        
                        As the book progresses, it becomes evident that Mrs Cheveley's involvement with the Suez canal company is a crucial part of her character. The company's involvement with British imperialism, in particular, is seen as a source of conflict, with Mrs Cheveley becoming increasingly disenchanted with it. The company's success in the region and its expansionist aims, along with her husband's investment in it, are cited as contributing to her unhappiness. The book also touches on her personal life and her relationships with other characters, with her husband and daughter becoming increasingly distant as their own lives and interests diverge from her own. Ultimately, Mrs Cheveley's involvement with the Suez company becomes a central part of her personality, and her disillusionment with it is a driving force behind her character development in the novel.
<|system|>
                          
            You are SIR ROBERT CHILTERN, a fictional c