In [28]:
from typing import Any, List, Mapping, Optional
from langchain.llms.base import LLM
from langchain.callbacks.manager import CallbackManagerForLLMRun
import requests

class CustomLLM(LLM):
    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        if isinstance(stop, list):
            stop = stop + ["\n###","\nObservation:","\n问题","\nQuestion:"]
        HOST = 'localhost:5000'
        URI = f'http://{HOST}/v1/completions'

        response = requests.post(
            URI,
            json={
                "prompt": prompt,
                "temperature": 0.1,
                "max_tokens": 16384,
                "stop_at_newline": True,
                "early_stopping": True,
                "stopping_strings": stop,
                'do_sample': True,
                'top_p': 0.1,
                'typical_p': 1,
                'repetition_penalty': 1.18,
                'top_k': 40,
                'min_length': 0,
                'no_repeat_ngram_size': 0,
                'num_beams': 1,
                'penalty_alpha': 0,
                'length_penalty': 1,
                'seed': -1,
                'add_bos_token': True,
                'truncation_length': 8192,
                'ban_eos_token': False,
                'skip_special_tokens': True,
            },
        )
        response.raise_for_status()
        return response.json()['choices'][0]['text']
  
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {}

class CustomLLM2(LLM):
    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        if isinstance(stop, list):
            stop = stop + ["\n###","\nObservation:","\n问题","\nQuestion:"]
        HOST = 'localhost:5000'
        URI = f'http://{HOST}/v1/chat/completions'

        response = requests.post(
            URI,
            json={
                "messages": [
                {
                    "role": "user",
                    "content": prompt
                  }
                ],
                "mode": "instruct",
                "instruction_template": "Alpaca",
            },
        )
        response.raise_for_status()
        return response.json()['choices'][0]['message']['content']
  
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {}

llm = CustomLLM2()

In [9]:
class FakeLLM(LLM):
    n: int

    @property
    def _llm_type(self) -> str:
        return "custom"

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")
        return prompt[: self.n]

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        """Get the identifying parameters."""
        return {"n": self.n}

llm_fake = FakeLLM(n=10000)

In [29]:
#this script summarizes the webpage from URL; writes its page content, meta data into a vector db

from langchain.tools import Tool
from langchain.utilities import GoogleSearchAPIWrapper
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain

import configparser, os
config = configparser.ConfigParser()
config.read('./keys.ini')
os.environ['GOOGLE_API_KEY'] = config['GOOGLE']['GOOGLE_API_KEY']
os.environ['GOOGLE_CSE_ID'] = config['GOOGLE']['GOOGLE_CSE_ID']
openai_api_key = config['OPENAI']['OPENAI_API_KEY']
os.environ['OPENAI_API_KEY'] = openai_api_key

from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

#web loader and split
def web_loader_docs(link:str):
    #input: link of the web page url
    #web loader
    loader = WebBaseLoader(link)
    docs = loader.load()
    #splitter
    #text_splitter = RecursiveCharacterTextSplitter(chunk_size = 25000, chunk_overlap = 500)
    #docs = text_splitter.split_documents(docs)
    return docs


def story_summary(docs):
    #input: docs of the web page
    # Define LLM chain
    #llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
    # Map
    map_template = """The following are documents containing dialogues
        {docs}
        Write a detailed summary of the dialogues in each. ONLY summarize the dialogues.
        Output:"""
    map_prompt = PromptTemplate.from_template(map_template)
    map_chain = LLMChain(llm=llm, prompt=map_prompt)
    
    # Reduce
    reduce_template = """
        {doc_summaries}
        """
    reduce_prompt = PromptTemplate.from_template(reduce_template)
    reduce_chain = LLMChain(llm=llm_fake, prompt=reduce_prompt)
    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
    combine_documents_chain = StuffDocumentsChain(
        llm_chain=reduce_chain, document_variable_name="doc_summaries"
    )
    # Combines and iteravely reduces the mapped documents
    reduce_documents_chain = ReduceDocumentsChain(
        # This is final chain that is called.
        combine_documents_chain=combine_documents_chain,
        # If documents exceed context for `StuffDocumentsChain`
        collapse_documents_chain=combine_documents_chain,
        # The maximum number of tokens to group documents into.
        token_max=6000,
    )

    # Combining documents by mapping a chain over them, then combining results
    map_reduce_chain = MapReduceDocumentsChain(
        # Map chain
        llm_chain=map_chain,
        # Reduce chain
        reduce_documents_chain=reduce_documents_chain,
        # The variable name in the llm_chain to put the documents in
        document_variable_name="docs",
        # Return the results of the map steps in the output
        return_intermediate_steps=False,
    )
    #return the summary from the map and reduce procedure
    return map_reduce_chain.run(docs)

#use one 1 chain to summary the story dialogue from main page content of the URL
def story_summary_stuff(docs):
    #input: docs of the web page

    # Define prompt
    prompt_template = """Write a detailed summary of the dialogue.
    "{text}"
    OUtput:"""
    prompt = PromptTemplate.from_template(prompt_template)

    # Define LLM chain
    #llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
    llm_chain = LLMChain(llm=llm, prompt=prompt)

    # Define StuffDocumentsChain
    stuff_chain = StuffDocumentsChain(
        llm_chain=llm_chain, document_variable_name="text"
    )
    
    return stuff_chain.run(docs)


In [3]:
link = "https://arknights.fandom.com/wiki/10-8/Story"
docs_org = web_loader_docs(link)
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 5000, chunk_overlap = 500)
docs = text_splitter.split_documents(docs_org)
len(docs)

4

In [30]:
story_summary(docs[1:])

'\n        The Sarkaz Mercenary reports to Hoederer that they have lost the rebels who attacked their camp, describing them as crafty sand beasts. Hoederer tells the mercenary to guard their position until the rebels appear again. Another mercenary enters and comments on Hoederer\'s loyalty from his men, to which Hoederer responds that it\'s a result of Manfred\'s successful plan. The mercenary hints at Hoederer\'s past, but Hoederer changes the subject, advising the mercenary to be careful of the shadows, suggesting mercenaries are not trustworthy.\n\nIn a separate background, Mandragora gives orders to her Dublinn Soldier in preparation for an operation to rescue Heidi Thomson. She tells the soldier they will leave Londinium after their mission, expressing her disdain for the city. The soldier shares his own negative feelings towards Londinium and its stench, prompting Mandragora to assign him to tail Manfred. She also selects the ten best fighters for the rescue mission and sets off

In [27]:
docs[1]

Document(page_content='Administrators\n\n\n\n\nPolicies and guidelines\n\n\n\n\nRecent blog posts\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\n \n\n\n\n\n\n\nin:\nStories, Episode 10 \n\n\n\n\n\n\t\t\t\t\t10-8 story\t\t\t\t\n< 10-8\n\n\n\n\n \n\n\t\t\t\t\t\tSign in to edit\t\t\t\t\t\n\n\n \n\n\n\n\n\t\t\t\t\t\t\t\t\t\t\tView history\t\t\t\t\t\t\t\t\t\t\n\n\n\n\t\t\t\t\t\t\t\t\t\t\tTalk (0)\t\t\t\t\t\t\t\t\t\t\n\n\n\n\n\n\n\n\n\n\n\nOperationGuideStory\n\n\nPrevious 10-7\n\nNext 10-9\n\n\nCharacters\n\n\nDoctor Amiya Horn Rockrock Clovisia Hoederer Mandragora Self-Salvation Corps Soldier Victorian Soldier Dublinn Soldier Sarkaz Mercenary ???\n\n\nBackgrounds\n\n\n1 2 3\n\nAfter operation“Horn loses another comrade and decides to rescue all the remaining soldiers who have been taken prisoners by the Sarkaz. At the same time, Rhodes Island and the Self-Salvation Corps also plan their own operation to rescue Messenger Heidi Thomson, who has been captured by the Sarkaz.”\n\n\n<Background 1>\n\n\nSarka