In [13]:
import dotenv
dotenv.load_dotenv()

True

In [14]:
B_INST, E_INST = "<s>[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

SYSTEM_PROMPT = """\
You are an integral part of a word-searching algorithm. \
In essence, you are a linguistic expert being tasked with \
modifying and combining text in creative but coherent new ways. \
"""


def get_llama_prompt(prompt, sys_prompt=None):
    sys_prompt = sys_prompt or SYSTEM_PROMPT
    prompt_template =  B_INST + B_SYS + sys_prompt + E_SYS + prompt + E_INST
    return prompt_template

In [15]:

import os
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams


default_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 10,
    GenParams.MAX_NEW_TOKENS: 25,
    GenParams.TEMPERATURE: 0.25,
    GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.05,
}

supported_models = [
    'bigcode/starcoder', 
    'bigscience/mt0-xxl', 
    'codellama/codellama-34b-instruct-hf', 
    'google/flan-t5-xl', 
    'google/flan-t5-xxl', 
    'google/flan-ul2', 
    'ibm-mistralai/mixtral-8x7b-instruct-v01-q', 
    'ibm/granite-13b-chat-v1', 
    'ibm/granite-13b-chat-v2', 
    'ibm/granite-13b-instruct-v1', 
    'ibm/granite-13b-instruct-v2', 
    'ibm/granite-20b-multilingual', 
    'meta-llama/llama-2-13b-chat', 
    'meta-llama/llama-2-70b-chat'
]

def generate_text(prompt, model='meta-llama/llama-2-13b-chat', new_params={}, sys_prompt=None):
    if not prompt.startswith(B_INST) and prompt.endswith(E_INST):
        prompt = get_llama_prompt(prompt, sys_prompt)
    
    params = dict(default_params)
    params.update(new_params)

    llm = Model(
        model_id=model,
        params=params,
        credentials={
            'apikey' : os.environ['WATSONX_API_KEY'], 
            'url' : os.environ['WATSONX_URL']
        },
        project_id=os.environ['WATSONX_PROJECT_ID']
    )
    return llm.generate_text(prompt)

In [16]:
# print(generate_text('why is the sky blue?'))

In [17]:
import string

def trim_incomplete_response(
        response, 
        delimiters=('.', '!', '?'), 
        comma_is_delimiter=False, 
        delim_follows_text=0,
        include_quotes=True,
        cutoff_str='...', 
        strip_ws=True,
        ):
    trimmed = str(response)
    if not response.endswith(delimiters):
        assert delim_follows_text < len(response), f'delim_follows_text ({delim_follows_text}) must be '
        i = len(response)
        while i > 0:
            i -= 1
            if response[i] in delimiters:
                if delim_follows_text > 0:
                    if not all(char in string.ascii_letters for char in response[i-delim_follows_text:i]):
                        continue
                trimmed = response[:i+1]
                break
            elif response[i] == ',' and comma_is_delimiter:
                trimmed = response[:i] + cutoff_str
                break
        if include_quotes:
            if response[i+1] == "'":
                trimmed += "'"
            elif response[i+1] == "\"":
                trimmed += "\""
        if strip_ws:
            trimmed = trimmed.strip()
    return trimmed

In [18]:
# test = \
# '''
# Take the following text and change some of the words significantly:

# 1. "A black horse on a white background."
#     - "The silhouette of a horse over a white background." 
#     - "A dark colored horse on a light colored background."

# 2. "Two businessmen shaking hands on a sidewalk."
#     - "One businessman shaking another businessman's hand on the street." 
#     - "Two businessmen greeting each other on the sidewalk."

# 3. "The sky appears blue because of a phenomenon called Rayleigh scattering."
#     - "The sky looks blue because of a scientific principle called Rayleigh scattering."
#     - "The sky's blue color is due to a natural process known as Rayleigh scattering."

# 4. "The
# '''
# result = trim_incomplete_response(test, delim_follows_text=False)
# print(f'\n\n{result}')
# result = trim_incomplete_response(test, delim_follows_text=2)
# print(f'\n\n{result}')

In [19]:
question_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 10,
    GenParams.MAX_NEW_TOKENS: 25,
    GenParams.TEMPERATURE: 0.25,
    # GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.05,
}

question = 'why is the sky blue?'


def ask(question, new_params={}, verbose=False):
    params = dict(question_params)
    params.update(new_params)
    if verbose: print(question)

    response = generate_text(question, new_params=params)
    if verbose: print(response)

    trimmed_response = trim_incomplete_response(
        response, 
        comma_is_delimiter=True, 
        cutoff_str='.', 
        delim_follows_text=2
        )
    if verbose: print(trimmed_response)
    return trimmed_response


In [20]:
STRONG = ' significantly'
MEDIUM = ''
WEAK = ' a little bit'
strengths = [WEAK, MEDIUM, STRONG]


def mutate_prompt(orig_text, strength=STRONG):
    assert strength in strengths
    return \
f'''
Take the following text and change some of the words{strength}:

1. "A black horse on a white background"
    - "The silhouette of a horse over a white background" 
    - "A dark colored horse on a light colored background"

2. "Two businessmen shaking hands on a sidewalk"
    - "One businessman shaking another businessman's hand on the street" 
    - "Two businessmen greeting each other on the sidewalk"

3. "{orig_text}"'''

mutate_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 20,
    GenParams.MAX_NEW_TOKENS: 50,
    GenParams.TEMPERATURE: 0.60,
    # GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.0,
}

import re


def extract_mutation_strings(mutated_text):
    return re.findall(r"(\w+[\w| |']*.?)", mutated_text)


def mutate(original_text, new_params={}, verbose=False, trim=2, extract=True):
    params = dict(mutate_params)
    params.update(new_params)
    mutation_prompt = mutate_prompt(original_text)
    if verbose: print(mutation_prompt, end='')
    mutated_response = generate_text(mutation_prompt, new_params=params)
    if verbose: print(mutated_response)
    if trim > 0: 
        mutated_response = trim_incomplete_response(mutated_response, delim_follows_text=trim, strip_ws=False)
    if verbose: print('trimmed:', mutated_response)
    return extract_mutation_strings(mutated_response) if extract else mutated_response


In [21]:
question = 'why is they sky blue?'
print(question)

answer = ask(question)
print(answer)

rephrasings = mutate(answer)
for rephrasing in rephrasings:
    print(rephrasing)


why is they sky blue?
The sky appears blue because of a phenomenon called Rayleigh scattering.
The sky appears blue because of a process called Rayleigh scattering.
The sky has a blue color due to a scientific concept called Rayleigh scattering.


In [27]:
question = 'why does the sea sometimes glow around me when I wade through it at night?'
print(question)

answer = ask(question, new_params={
    GenParams.MIN_NEW_TOKENS: 10,
    GenParams.MAX_NEW_TOKENS: 60,
    })
print(answer)

rephrasings = mutate(answer, new_params={
    GenParams.MIN_NEW_TOKENS: 10,
    GenParams.MAX_NEW_TOKENS: 100,
    })
for rephrasing in rephrasings:
    print(rephrasing)


why does the sea sometimes glow around me when I wade through it at night?
The sea can sometimes glow at night due to a phenomenon called bioluminescence. This occurs when microorganisms such as plankton, algae, or bacteria in the water produce light as a result of chemical reactions.
The ocean can sometimes emit a glowing light at night due to the presence of microorganisms that produce light as a result of chemical reactions.
The waves can sometimes shimmer in the dark due to the presence of tiny organisms that emit light.
