In [62]:
from typing import Any, List, Mapping, Optional
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
import requests

HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/generate'

class CustomLLM(LLM):
    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        if isinstance(stop, list):
            stop = stop + ["\n###","\nObservation:"]

        response = requests.post(
            URI,
            json={
                "prompt": prompt,
                "temperature": 0.1,
                "max_new_tokens": 256,
                "early_stopping": True,
                "stopping_strings": stop,
                'do_sample': True,
                'top_p': 0.1,
                'typical_p': 1,
                'repetition_penalty': 1.18,
                'top_k': 40,
                'min_length': 0,
                'no_repeat_ngram_size': 0,
                'num_beams': 1,
                'penalty_alpha': 0,
                'length_penalty': 1,
                'seed': -1,
                'add_bos_token': True,
                'truncation_length': 8192,
                'ban_eos_token': False,
                'skip_special_tokens': True,
            },
        )
        response.raise_for_status()
        return response.json()['results'][0]['text']
  
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {}

llm = CustomLLM()

In [61]:
class FakeLLM(LLM):
    n: int

    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")
        return prompt[: self.n]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"n": self.n}

llm_fake = FakeLLM(n=10000)

In [86]:
#this script summarizes the webpage from URL; writes its page content, meta data into a vector db

from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.chat_models import ChatOpenAI


import configparser, os
config = configparser.ConfigParser()
config.read('./keys.ini')
os.environ['GOOGLE_API_KEY'] = config['GOOGLE']['GOOGLE_API_KEY']
os.environ['GOOGLE_CSE_ID'] = config['GOOGLE']['GOOGLE_CSE_ID']
openai_api_key = config['OPENAI']['OPENAI_API_KEY']
os.environ['OPENAI_API_KEY'] = openai_api_key

from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

#web loader and split
def web_loader_docs(link:str):
    #input: link of the web page url
    #web loader
    loader = WebBaseLoader(link)
    docs = loader.load()
    #splitter
    #text_splitter = RecursiveCharacterTextSplitter(chunk_size = 25000, chunk_overlap = 500)
    #docs = text_splitter.split_documents(docs)
    return docs


def story_summary(docs):
    #input: docs of the web page
    # Define LLM chain
    #llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
    # Map
    map_template = """The following are documents containing dialogues
        {docs}
        Write a detailed summary of the dialogues in each. ONLY summarize the dialogues.
        Output:"""
    map_prompt = PromptTemplate.from_template(map_template)
    map_chain = LLMChain(llm=llm, prompt=map_prompt)
    
    # Reduce
    reduce_template = """
        {doc_summaries}
        """
    reduce_prompt = PromptTemplate.from_template(reduce_template)
    reduce_chain = LLMChain(llm=llm_fake, prompt=reduce_prompt)
    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
    combine_documents_chain = StuffDocumentsChain(
        llm_chain=reduce_chain, document_variable_name="doc_summaries"
    )
    # Combines and iteravely reduces the mapped documents
    reduce_documents_chain = ReduceDocumentsChain(
        # This is final chain that is called.
        combine_documents_chain=combine_documents_chain,
        # If documents exceed context for `StuffDocumentsChain`
        collapse_documents_chain=combine_documents_chain,
        # The maximum number of tokens to group documents into.
        token_max=6000,
    )

    # Combining documents by mapping a chain over them, then combining results
    map_reduce_chain = MapReduceDocumentsChain(
        # Map chain
        llm_chain=map_chain,
        # Reduce chain
        reduce_documents_chain=reduce_documents_chain,
        # The variable name in the llm_chain to put the documents in
        document_variable_name="docs",
        # Return the results of the map steps in the output
        return_intermediate_steps=False,
    )
    #return the summary from the map and reduce procedure
    return map_reduce_chain.run(docs)

#use one 1 chain to summary the story dialogue from main page content of the URL
def story_summary_stuff(docs):
    #input: docs of the web page

    # Define prompt
    prompt_template = """Write a detailed summary of the dialogue.
    "{text}"
    OUtput:"""
    prompt = PromptTemplate.from_template(prompt_template)

    # Define LLM chain
    #llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
    llm_chain = LLMChain(llm=llm, prompt=prompt)

    # Define StuffDocumentsChain
    stuff_chain = StuffDocumentsChain(
        llm_chain=llm_chain, document_variable_name="text"
    )
    
    return stuff_chain.run(docs)


In [87]:
link = "https://arknights.fandom.com/wiki/10-8/Story"
docs_org = web_loader_docs(link)
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 5000, chunk_overlap = 500)
docs = text_splitter.split_documents(docs_org)
len(docs)

4

In [89]:
story_summary(docs[1:])

'\n        \n        \n\n\n\nAdministrators, policies and guidelines\nRecent blog posts\n\n\n\n\nThe following are documents containing dialogues:\n\n\n\nAdmins\n\n\n\t* Episode 1\n\n\n\n\n\nVictoria has lost another fine soldier. A little girl has lost her father. How many more must we lose before we finally see the end of this tragedy?\nThe team got back a while ago. They didn\'t find any medicine. Salley\'s family lived in County Ascarat. Blake up his belongings, please. Once the war is over, we\'ll pay his family a visit. He left nothing behind. No story to tell. We have to go on this journey still. His daughter will want to hear his story. Horn apologized to me. Said "Sorry, Lieutenant, but you must live on." Soldier said. Victoria understands. It\'s harder to live. A Victorian soldier owes an apology. The state of Londonium is their home, but it\'s become a prison. They want to keep marching down this road because Clovis gave them hope.\nClovis marked eight probable locations on 

In [84]:
docs[1]

Document(page_content='Administrators\n\n\n\n\nPolicies and guidelines\n\n\n\n\nRecent blog posts\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n \n\n\n\n\n\n\nin:\nStories, Episode 10 \n\n\n\n\n\n\t\t\t\t\t10-8 story\t\t\t\t\n< 10-8\n\n\n\n\n \n\n\t\t\t\t\t\tSign in to edit\t\t\t\t\t\n\n\n \n\n\n\n\n\t\t\t\t\t\t\t\t\t\t\tView history\t\t\t\t\t\t\t\t\t\t\n\n\n\n\t\t\t\t\t\t\t\t\t\t\tTalk (0)\t\t\t\t\t\t\t\t\t\t\n\n\n\n\n\n\n\n\n\n\n\nOperationGuideStory\n\n\nPrevious 10-7\n\nNext 10-9\n\n\nCharacters\n\n\nDoctor Amiya Horn Rockrock Clovisia Hoederer Mandragora Self-Salvation Corps Soldier Victorian Soldier Dublinn Soldier Sarkaz Mercenary ???\n\n\nBackgrounds\n\n\n1 2 3\n\nAfter operation“Horn loses another comrade and decides to rescue all the remaining soldiers who have been taken prisoners by the Sarkaz. At the same time, Rhodes Island and the Self-Salvation Corps also plan their own operation to rescue Messenger Heidi Thomson, who has been captured by the Sarkaz.”\n\n\n<Background 1>\n\n\nSarka