In [1]:
import torch
import torchvision

In [2]:
torch.cuda.is_available()

True

In [3]:
!pip install replicate
!pip install faiss-cpu
# !pip install chromadb --progress-bar off
!pip install transformers --progress-bar off
!pip install langchain --progress-bar off
!pip install sentence_transformers --progress-bar off
!pip install InstructorEmbedding --progress-bar off
!pip install textsum

Collecting replicate
  Obtaining dependency information for replicate from https://files.pythonhosted.org/packages/67/6d/c0a5cad0a5907454580ad0b51d0bf82d0d0980590e569f1f0f37f28c2316/replicate-0.15.5-py3-none-any.whl.metadata
  Downloading replicate-0.15.5-py3-none-any.whl.metadata (18 kB)
Collecting httpx<1,>=0.21.0 (from replicate)
  Obtaining dependency information for httpx<1,>=0.21.0 from https://files.pythonhosted.org/packages/33/0d/d9ce469af019741c8999711d36b270ff992ceb1a0293f73f9f34fdf131e9/httpx-0.25.0-py3-none-any.whl.metadata
  Downloading httpx-0.25.0-py3-none-any.whl.metadata (7.6 kB)
Collecting httpcore<0.19.0,>=0.18.0 (from httpx<1,>=0.21.0->replicate)
  Obtaining dependency information for httpcore<0.19.0,>=0.18.0 from https://files.pythonhosted.org/packages/ac/97/724afbb7925339f6214bf1fdb5714d1a462690466832bf8fb3fd497649f1/httpcore-0.18.0-py3-none-any.whl.metadata
  Downloading httpcore-0.18.0-py3-none-any.whl.metadata (18 kB)
Collecting h11<0.15,>=0.13 (from httpcore<0

In [4]:
# import warnings
# warnings.filterwarnings("ignore")

import os
import glob
import textwrap
import time

import re

import langchain

# loaders
from langchain.document_loaders import TextLoader


# splits
from langchain.text_splitter import RecursiveCharacterTextSplitter

# prompts
from langchain import PromptTemplate, LLMChain

# vector stores
from langchain.vectorstores import FAISS
from langchain.vectorstores import Chroma

# models
from langchain.llms import HuggingFacePipeline
from InstructorEmbedding import INSTRUCTOR
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.llms import Replicate
from textsum.summarize import Summarizer

# retrievers
from langchain.chains import RetrievalQA

import torch
import transformers
# from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
# from transformers import AutoTokenizer, TextStreamer, pipeline

# data collectors
import requests
from bs4 import BeautifulSoup
import difflib


# Models

## Summarizing Model
model_name = "pszemraj/led-large-book-summary"
summarizer = Summarizer(
    model_name_or_path=model_name,
    token_batch_length=10000,
)
# configurations for summarizer
min_word_count = 200
max_word_count = 300

tokens_per_word = 1.3

min_token_count = min_word_count * tokens_per_word
max_token_count = max_word_count * tokens_per_word

# Set the length constraints in the inference params
inference_params = summarizer.inference_params
inference_params['max_length'] = int(max_token_count)
inference_params['min_length'] = int(min_token_count)
summarizer.set_inference_params(inference_params)

## Embeddings model
instructor_embeddings = HuggingFaceInstructEmbeddings(
        model_name = "hkunlp/instructor-base",
        model_kwargs = {"device": "cuda"}
)

## Llama2-13 by Replicate
REPLICATE_API_TOKEN = "r8_4o6DI4Kl9VfQdrVv6OlaqvAyMhFdamr2jUDVe"
os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN

llm = Replicate(
    model = "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
    input = {"temperature": 0.75, "max_length": 1024, "top_p": 0.95, "repetition_penalty": 1.15},
)


prompt_template = """
Don't try to make up an answer, if you don't know just say that you don't know.
Answer in the same language the question was asked.
Use only the following pieces of context to answer the question at the end.

{context}

Question: {question}
Answer:"""

# Custom Prompt
PROMPT = PromptTemplate(
    template = prompt_template,
    input_variables = ["context", "question"]
)

# Functions for Book Retrieval

## Function to search for a book by name and return the best match URL
def search_book_by_name(book_name):
    base_url = "https://www.gutenberg.org/"
    search_url = base_url + "ebooks/search/?query=" + book_name.replace(" ", "+") + "&submit_search=Go%21"

    response = requests.get(search_url)
    soup = BeautifulSoup(response.content, "html.parser")

    # Find the best match link based on similarity ratio
    best_match_ratio = 0
    best_match_url = ""

    for link in soup.find_all("li", class_="booklink"):
        link_title = link.find("span", class_="title").get_text()
        similarity_ratio = difflib.SequenceMatcher(None, book_name.lower(), link_title.lower()).ratio()
        if similarity_ratio > best_match_ratio:
            best_match_ratio = similarity_ratio
            best_match_url = base_url + link.find("a").get("href")

    return best_match_url

## Function to get the "Plain Text UTF-8" download link from the book page
def get_plain_text_link(book_url):
    response = requests.get(book_url)
    soup = BeautifulSoup(response.content, "html.parser")

    plain_text_link = ""

    for row in soup.find_all("tr"):
        format_cell = row.find("td", class_="unpadded icon_save")
        if format_cell and "Plain Text UTF-8" in format_cell.get_text():
            plain_text_link = format_cell.find("a").get("href")
            break

    return plain_text_link


## Function to get the content of the "Plain Text UTF-8" link
def get_plain_text_content(plain_text_link):
    response = requests.get(plain_text_link)
    content = response.text
    return content


## Main function
def load_book(book_name):
    best_match_url = search_book_by_name(book_name)

    if best_match_url:
        plain_text_link = get_plain_text_link(best_match_url)
        if plain_text_link:
            full_plain_text_link = "https://www.gutenberg.org" + plain_text_link
            plain_text_content = get_plain_text_content(full_plain_text_link)
#             print("Plain Text UTF-8 content:", plain_text_content)

            book_text = plain_text_content

            # Remove the BOM character if it exists
            book_text = book_text.lstrip('\ufeff')

            #####
             # Define the possible variations of the start marker
            possible_start_markers = [
                r"\*\*\* START OF THIS PROJECT GUTENBERG EBOOK (.+?) \*\*\*",
                r"\*\*\* START OF THE PROJECT GUTENBERG EBOOK (.+?) \*\*\*"
            ]

            # Fetch the plain_text_content of the book (assuming you have it)
            plain_text_content = book_text  # Fetch the content here

            start_index = None
            for start_marker_pattern in possible_start_markers:
                match = re.search(start_marker_pattern, book_text)
                if match:
                    start_index = match.start()
                    book_name = match.group(1)
                    break

            if start_index is not None:
                end_marker = f"*** END OF THE PROJECT GUTENBERG EBOOK {book_name} ***"

                end_index = plain_text_content.find(end_marker, start_index)

                if end_index != -1:
                    book_text = plain_text_content[start_index + len(match.group(0)):end_index]


            #####

            # Choose an appropriate encoding, such as 'utf-8'
            with open("book.txt", "w", encoding="utf-8") as book:
                book.write(book_text)

            return book_text
        else:
            print("No Plain Text UTF-8 link found.")
            return "web site error"
    else:
        print("No matching book found.")
        return "web site error"


# Function to get Summary
def generate_summary(book_text):
  global summarizer
  out_str = summarizer.summarize_string(book_text)
  # input_ids = tokenizer.encode(out_str, return_tensors='pt', max_length=1024, truncation=True)

  # summary_ids = model.generate(input_ids, max_length=300, min_length=150, length_penalty=2.0, num_beams=4, early_stopping=True)

  # summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

  # return summary
  return out_str


# Functions for Q/A chatbot

## Splitting book.txt to create embeddings
def loadForEmbeddings(txt_file):
    # load document
    loader = TextLoader(txt_file, encoding="utf-8")
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 800,
        chunk_overlap = 0
    )

    texts = text_splitter.split_documents(documents)
    return texts

def wrap_text_preserve_newlines(text, width=200): # 110
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text

## Format llm response
def process_llm_response(llm_response):
    ans = wrap_text_preserve_newlines(llm_response['result'])

    sources_used = llm_response['source_documents'][0].metadata['source']

    ans = ans + '\n\nSources: \n' + sources_used
    return ans

## Main function in Q/A
def llm_ans(query):
    start = time.time()
    qa_chain = RetrievalQA.from_chain_type(
        llm = llm,
        chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
        retriever = retriever,
        chain_type_kwargs = {"prompt": PROMPT},
        return_source_documents = True,
        verbose = False
    )
    llm_response = qa_chain(query)
    ans = process_llm_response(llm_response)
    end = time.time()

    time_elapsed = int(round(end - start, 0))
    time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
    return ans + time_elapsed_str

# Example for creating Embeddings
book_name = "The prince"
book_text = load_book(book_name)
book = "book.txt"
texts = loadForEmbeddings(book)

## create embeddings
vectordb = FAISS.from_documents(
    documents = texts,
    embedding = instructor_embeddings
)

# Variable to check whether the book name entered
no_book = False

# Gets book name
# then creates embeddings
# and after that generates and returns summary
def submit_book(book_name):
    global vectordb, retriever, instructor_embeddings, no_book
    if not book_name:
      no_book = True
      return "Please enter the name of the book."

    book_text = load_book(book_name)
    book = "book.txt"
    texts = loadForEmbeddings(book)

    # create embeddings
    vectordb = FAISS.from_documents(
        documents = texts,
        embedding = instructor_embeddings
    )

    retriever = vectordb.as_retriever(search_kwargs = {"k": 3, "search_type" : "similarity"})

    summary = generate_summary(book_text)

    return summary

# Gets the prompt and returns Llm response
def get_response(prompt):
    if (no_book and not prompt):
        return "Please enter the name of the book and the prompt."
    if no_book:
        return "Please enter the name of the book."
    if not prompt:
        return "Please enter the prompt."

    return llm_ans(prompt)


Downloading (…)lve/main/config.json:   0%|          | 0.00/1.44k [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.84G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/1.32k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]

10/30/2023 12:17:51 INFO Loaded model pszemraj/led-large-book-summary to cuda
10/30/2023 12:17:51 INFO Load pretrained SentenceTransformer: hkunlp/instructor-base


load INSTRUCTOR_Transformer




max_seq_length  512


RuntimeError: CUDA error: an illegal memory access was encountered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
%%time
response_ = submit_book("The Valley of Fear")
print(response_)

In [None]:
%%time
answer = get_response("What are the characters in the book?")
print(answer)