Skip to content

Commit

Permalink
Assistant OpenAI style functionality
Browse files Browse the repository at this point in the history
  • Loading branch information
artitw committed Sep 24, 2023
1 parent dc6231b commit 749135e
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 13 deletions.
23 changes: 21 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,34 @@ t2t.Transformer.LANGUAGES
## Examples

### Assistant
- Free private open source alternative to commercial LLMs.
- Free private open source alternative to commercial LLMs.
- Commercial LLMs are costly, collect your data, impose quotas and rate limits that hinder development.
- Run at no cost on Google Colab free tier, so you don't even need your own device.

```
import text2text as t2t
asst = t2t.Assistant()
instructions = "Generate a JSON object that maps English characters as keys and Greek equivalents as values: {"
res = t2t.Assistant().transform([instructions])
res = asst.transform([instructions])
#[
# '{\n"a": "α",\n"b": "β",\n"c": "γ",\n"d": "δ",\n"e": "ε",\n"f": "φ",\n"g": "χ",\n"h": "ι",\n"i": "η",\n"j": "κ",\n"k": "λ",\n"l": "μ",\n"m": "ν",\n"n": "ξ",\n"o": "ο",\n"p": "π",\n"q": "ρ",\n"r": "σ",\n"s": "τ",\n"t": "υ",\n"u": "ύ",\n"v": "φ",\n"w": "χ",\n"x": "ψ",\n"y": "ω",\n"z": "ζ"\n}'
#]
#OpenAI API format
input_prompts = ["Hello, world!"]
print(
asst.completion_tokens(input_prompts),
asst.completion(input_prompts)
)
#[13]
#['Hello there! How can I help you today? If you have any questions or need assistance with something, feel free to ask.']
results = asst.chat_completion([
{"role": "user", "content": "Hi"},
{"role": "assistant", "content": "Hello, how are you?"},
{"role": "user", "content": "What should I do today?"}
])
#{'role': 'assistant', 'content': '1. Make a list of things to be grateful for.\n2. Go outside and take a walk in nature.\n3. Practice mindfulness meditation.\n4. Connect with a loved one or friend.\n5. Do something kind for someone else.\n6. Engage in a creative activity like drawing or writing.\n7. Read an uplifting book or listen to motivational podcasts.'}
```
- To use a dynamic knowledge base, see [![Q&A Assistant Demo](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1hkNgpSmmUA-mzUibqz25xq-E8KYOLuVx?usp=sharing)
- To use with LangChain, see [![LangChain integration](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1K6Kk80w9vjFZ7PL9dPRgVuOPuaWcY4ae?usp=sharing)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="text2text",
version="1.3.2",
version="1.3.3",
author="artitw",
author_email="artitw@gmail.com",
description="Text2Text: Crosslingual NLP/G toolkit",
Expand Down
55 changes: 45 additions & 10 deletions text2text/assistant.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import pandas as pd
import logging
import pandas as pd
import text2text as t2t
from transformers import AutoTokenizer, logging
from auto_gptq import AutoGPTQForCausalLM

logging.set_verbosity(logging.CRITICAL)

def _clean_output(input_prompt, output_text):
return output_text.replace('<s>',"").replace('</s>',"").replace(input_prompt, "").strip()

class Assistant(t2t.Transformer):

def __init__(self, **kwargs):
Expand All @@ -21,26 +24,23 @@ def __init__(self, **kwargs):
quantize_config=None
)

def preprocess(self, input_lines, src_lang='en', retriever=None, **kwargs):
input_lines = t2t.Transformer.transform(self, input_lines, src_lang, **kwargs)
def preprocess(self, input_lines, retriever=None, **kwargs):
df = pd.DataFrame({"input_line": input_lines})
if src_lang != 'en':
df["input_line"] = self._translate_lines(df["input_line"].tolist(), src_lang, 'en')
if retriever:
k = kwargs.get('k', 1)
df["knowledge"] = retriever.retrieve(df["input_line"].str.lower().tolist(), k=k)
df["input_line"] = df["knowledge"].apply(' '.join) + " - " + df["input_line"]
df["input_line"] = "USER: " + df["input_line"] + "\nASSISTANT:"
return df

def num_tokens(self, input_lines, src_lang='en'):
df = self.preprocess(input_lines, src_lang)
def completion_tokens(self, input_lines):
df = self.preprocess(input_lines)
tok = self.__class__.tokenizer
input_ids = tok(df["input_line"].tolist(), return_tensors="pt", padding=True).input_ids
return [len(x) for x in input_ids]

def transform(self, input_lines, src_lang='en', retriever=None, **kwargs):
df = self.preprocess(input_lines, src_lang, retriever, **kwargs)
def transform(self, input_lines, retriever=None, **kwargs):
df = self.preprocess(input_lines, retriever, **kwargs)
temperature = kwargs.get('temperature', 0.7)
top_p = kwargs.get('top_p', 0.95)
top_k = kwargs.get('top_k', 0)
Expand All @@ -62,6 +62,41 @@ def transform(self, input_lines, src_lang='en', retriever=None, **kwargs):
)

df["output_line"] = tok.batch_decode(m.generate(**generate_kwargs))
df["output_line"] = df.apply(lambda row: row["output_line"].replace('<s>',"").replace('</s>',"").replace(row["input_line"], "").strip(), axis=1)
df["output_line"] = df.apply(lambda row: _clean_output(row["input_line"], row["output_line"]), axis=1)

return df["output_line"].tolist()

completion = transform

def chat_completion(self, input_lines, **kwargs):
chat_history = []
for line in input_lines:
chat_history.append(f'{line["role"].upper()}: {line["content"]}')
chat_history.append("ASSISTANT: ")
input_prompt = "\n".join(chat_history)

temperature = kwargs.get('temperature', 0.7)
top_p = kwargs.get('top_p', 0.95)
top_k = kwargs.get('top_k', 0)
repetition_penalty = kwargs.get('repetition_penalty', 1.15)
max_new_tokens = kwargs.get('max_new_tokens', 512)
tok = self.__class__.tokenizer
m = self.__class__.model

input_ids = tok([input_prompt], return_tensors="pt", padding=True).input_ids
input_ids = input_ids.to(m.device)
generate_kwargs = dict(
input_ids=input_ids,
max_new_tokens=max_new_tokens,
temperature=temperature,
do_sample=temperature > 0.0,
top_p=top_p,
top_k=top_k,
repetition_penalty=repetition_penalty,
)

results = tok.batch_decode(m.generate(**generate_kwargs))[0]
return {
"role": "assistant",
"content": _clean_output(input_prompt, results)
}

0 comments on commit 749135e

Please sign in to comment.