# Ooba Langchain wrapper
Langchain for [text-generation-webui](https://github.com/oobabooga/text-generation-webui) API

This notebook currently assumes you are self-hosting a model that uses the Vicuna 1.1 prompt format.

### Installs

In [None]:
! pip install -qq -U langchain

### Imports

In [None]:
import langchain
import requests

from langchain.llms.base import LLM, Optional, List, Mapping, Any
from pydantic import Field

### API Endpoint

Sample:
- http://localhost:5000
- http://192.168.1.2:5000
- https://raising-df-zoning-proteins.trycloudflare.com

In [None]:
api_url = "http://localhost:5000"

### Wrapper for Ooba API

In [None]:
class OobaApiLLM(LLM):
    endpoint: str = Field(...)

    @property
    def _llm_type(self) -> str:
        return "custom"


    def _call(self, prompt: str, stop: Optional[List[str]]=None) -> str:
        data = {
            'prompt': prompt,
            'max_new_tokens': 500,
            'preset': 'simple-1',
        }

        if stop is not None:
            data["stop_sequence"] = stop

        response = requests.post(f'{self.endpoint}/api/v1/generate', json=data)
        response.raise_for_status()

        json_response = response.json()
        if 'results' in json_response and len(json_response['results']) > 0 and 'text' in json_response['results'][0]:
            text = json_response['results'][0]['text'].strip()
            if stop is not None:
                for sequence in stop:
                    if text.endswith(sequence):
                        text = text[: -len(sequence)].rstrip()

            print(text)
            return text
        else:
            raise ValueError('Unexpected response format from Ooba API')

    def __call__(self, prompt: str, stop: Optional[List[str]]=None) -> str:
        return self._call(prompt, stop)

    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {'endpoint': self.endpoint}


llm = OobaApiLLM(endpoint=api_url)

### Test the LLM instance
Using the Vicuna style prompt, generate a response.

In [None]:
llm_response = llm(f"""A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user"s input.
USER: How would I make pizza dough at home?
ASSISTANT: 
""")

### Add Conversation Chain with Memory

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain import LLMChain, PromptTemplate
from langchain.callbacks import StdOutCallbackHandler

### Setup LLM Chain with Template, Prompt & Memory

In [None]:
handler = StdOutCallbackHandler()
template = """A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user"s input.

{chat_history}
USER: {human_input}
ASSISTANT:"""

prompt = PromptTemplate(
    input_variables=["chat_history", "human_input"], template=template
)
memory = ConversationBufferMemory(memory_key="chat_history")

llm_chain = LLMChain(
    llm=llm,
    prompt=prompt,
    verbose=True, # Enabled to see the LLM thought process
    memory=memory,
    callbacks=None
)

### Utilize LLM w/ Conversational Memory

Using the `verbose` option above we can see how the bot is thinking

In [None]:
response = llm_chain.predict(human_input="Can you tell me a joke about cars?")

### Ask a Follow-up to Test Memory

In [None]:
response = llm_chain.predict(human_input="Do you have any other good ones?")

#print(f"-----------\n{response}") # The final response is also available!