In [1]:
import dotenv
dotenv.load_dotenv()

True

In [2]:
B_INST, E_INST = "<s>[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"

SYSTEM_PROMPT = """\
You are an integral part of a word-searching algorithm. \
In essence, you are a linguistic expert being tasked with \
modifying and combining text in creative but coherent new ways. \
"""


def get_llama_prompt(prompt, sys_prompt=None):
    sys_prompt = sys_prompt or SYSTEM_PROMPT
    prompt_template =  B_INST + B_SYS + sys_prompt + E_SYS + prompt + E_INST
    return prompt_template

In [3]:

import os
from ibm_watson_machine_learning.foundation_models import Model
from ibm_watson_machine_learning.metanames import GenTextParamsMetaNames as GenParams


default_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 10,
    GenParams.MAX_NEW_TOKENS: 25,
    GenParams.TEMPERATURE: 0.25,
    # GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.05,
}

supported_models = [
    'bigcode/starcoder', 
    'bigscience/mt0-xxl', 
    'codellama/codellama-34b-instruct-hf', 
    'google/flan-t5-xl', 
    'google/flan-t5-xxl', 
    'google/flan-ul2', 
    'ibm-mistralai/mixtral-8x7b-instruct-v01-q', 
    'ibm/granite-13b-chat-v1', 
    'ibm/granite-13b-chat-v2', 
    'ibm/granite-13b-instruct-v1', 
    'ibm/granite-13b-instruct-v2', 
    'ibm/granite-20b-multilingual', 
    'meta-llama/llama-2-13b-chat', 
    'meta-llama/llama-2-70b-chat'
]

def generate_text(prompt, model='meta-llama/llama-2-13b-chat', new_params={}, sys_prompt=None):
    if not prompt.startswith(B_INST) and prompt.endswith(E_INST):
        prompt = get_llama_prompt(prompt, sys_prompt)
    
    params = dict(default_params)
    params.update(new_params)

    llm = Model(
        model_id=model,
        params=params,
        credentials={
            'apikey' : os.environ['WATSONX_API_KEY'], 
            'url' : os.environ['WATSONX_URL']
        },
        project_id=os.environ['WATSONX_PROJECT_ID']
    )
    return llm.generate_text(prompt)

In [4]:
# print(generate_text('why is the sky blue?'))

In [5]:
import re
import string

def trim_incomplete_response(
        response, 
        delimiters=('.', '!', '?'), 
        comma_is_delimiter=False, 
        delim_follows_text=0,
        include_quotes=True,
        cutoff_str='...', 
        strip_ws=True,
        ):
    trimmed = str(response)
    if not response.endswith(delimiters):
        assert delim_follows_text < len(response), f'delim_follows_text ({delim_follows_text}) must be '
        i = len(response)
        while i > 0:
            i -= 1
            if response[i] in delimiters:
                if delim_follows_text > 0:
                    if not all(char in string.ascii_letters for char in response[i-delim_follows_text:i]):
                        continue
                trimmed = response[:i+1]
                break
            elif response[i] == ',' and comma_is_delimiter:
                trimmed = response[:i] + cutoff_str
                break
        if include_quotes:
            if response[i+1] == "'":
                trimmed += "'"
            elif response[i+1] == "\"":
                trimmed += "\""
        if strip_ws:
            trimmed = trimmed.strip()
    return trimmed


def extract_strings(mutated_text):
    return re.findall(r"(\w+[\w| |']*.?)", mutated_text)

In [6]:
# test = \
# '''
# Take the following text and change some of the words significantly:

# 1. "A black horse on a white background."
#     - "The silhouette of a horse over a white background." 
#     - "A dark colored horse on a light colored background."

# 2. "Two businessmen shaking hands on a sidewalk."
#     - "One businessman shaking another businessman's hand on the street." 
#     - "Two businessmen greeting each other on the sidewalk."

# 3. "The sky appears blue because of a phenomenon called Rayleigh scattering."
#     - "The sky looks blue because of a scientific principle called Rayleigh scattering."
#     - "The sky's blue color is due to a natural process known as Rayleigh scattering."

# 4. "The
# '''
# result = trim_incomplete_response(test, delim_follows_text=False)
# print(f'\n\n{result}')
# result = trim_incomplete_response(test, delim_follows_text=2)
# print(f'\n\n{result}')

In [7]:
question_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 10,
    GenParams.MAX_NEW_TOKENS: 25,
    GenParams.TEMPERATURE: 0.25,
    # GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.05,
}


def ask(question, new_params={}, verbose=False):
    params = dict(question_params)
    params.update(new_params)
    if verbose: print(question)

    response = generate_text(question, new_params=params)
    if verbose: print(response)

    trimmed_response = trim_incomplete_response(
        response, 
        comma_is_delimiter=True, 
        cutoff_str='.', 
        delim_follows_text=2
        )
    if verbose: print(trimmed_response)
    return trimmed_response


In [8]:
# STRONG = 'Take the following text and change it as much as possible while retaining the same meaning.'
# MEDIUM = 'Take the following text and change some of the words to rephrase the same meaning.'
# WEAK = 'Take the following text and tweak some of the wording while retaining the meaning.'
STRONG = ' significantly'
MEDIUM = ''
WEAK = ' a little bit'
strengths = [WEAK, MEDIUM, STRONG]


def make_mutation_prompt(orig_text, strength=STRONG):
    assert strength in strengths
    return \
f'''
Take the following text and change some of the words{strength}:

1. "A black horse on a white background"
    - "The silhouette of a horse over a white background" 
    - "A dark colored horse on a light colored background"

2. "Two businessmen shaking hands on a sidewalk"
    - "One businessman shaking another businessman's hand on the street" 
    - "Two businessmen greeting each other on the sidewalk"

3. "{orig_text}"'''

mutate_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 20,
    GenParams.MAX_NEW_TOKENS: 50,
    GenParams.TEMPERATURE: 0.60,
    # GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.0,
}


def mutate(original_text, new_params={}, verbose=False, trim=2, extract=True):
    params = dict(mutate_params)
    params.update(new_params)
    mutation_prompt = make_mutation_prompt(original_text)
    if verbose: print(mutation_prompt, end='')
    mutated_response = generate_text(mutation_prompt, new_params=params)
    if verbose: print(mutated_response)
    if trim > 0: 
        mutated_response = trim_incomplete_response(mutated_response, delim_follows_text=trim, strip_ws=False)
    if verbose: print('trimmed:', mutated_response)
    return extract_strings(mutated_response) if extract else mutated_response


In [9]:
# question_a = 'why is the sky blue?'
# print(question_a)

# answer_a = ask(question_a)
# print(answer_a)

# rephrasings_a = mutate(answer_a)
# for rephrasing in rephrasings_a:
#     print(rephrasing)


In [10]:
# question_b = 'why does the sea sometimes glow around me when I wade through it at night?'
# print(question_b)

# answer_b = ask(question_b, new_params={
#     GenParams.MIN_NEW_TOKENS: 10,
#     GenParams.MAX_NEW_TOKENS: 60,
#     })
# print(answer_b)

# rephrasings_b = mutate(answer_b, new_params={
#     GenParams.MIN_NEW_TOKENS: 10,
#     GenParams.MAX_NEW_TOKENS: 100,
#     })
# for rephrasing in rephrasings_b:
#     print(rephrasing)


In [11]:
def make_crossover_prompt(mother_text, father_text):
    return \
f'''
Take two sentences and combine them in multiple new ways:

1. "A black horse on a white background." + "A silver fish traveling upstream."
    - "The silhouette of a fish over a silver background." 
    - "A black horse and silver fish." 
    - "A dark colored horse traveling up a stream."
    - "A black fish swimming up a white river."

2. "Two businessmen shaking hands on a sidewalk." + "A graph showing the impact of various pesticides on the Colorado Potato Beetle."
    - "A graph showing businessmen spraying pesticides." 
    - "Two businessmen discussing a graph about Colorado Potato Beetle populations." 
    - "A Colorado Potato Beetle on the sidewalk has died due to pesticides."
    - "A Colorado Potato Beetle shakes hands with a businessman."

3. "{mother_text}" + "{father_text}"'''

crossover_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 20,
    GenParams.MAX_NEW_TOKENS: 120,
    GenParams.TEMPERATURE: 0.50,
    # GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.1,
}

import re


def extract_strings(mutated_text):
    return re.findall(r"(\w+[\w| |']*.?)", mutated_text)


def crossover(mother_text, father_text, new_params={}, verbose=False, trim=2, extract=True):
    params = dict(crossover_params)
    params.update(new_params)
    crossover_prompt = make_crossover_prompt(mother_text, father_text)
    if verbose: print(crossover_prompt, end='')
    crossover_response = generate_text(crossover_prompt, new_params=params)
    if verbose: print(crossover_response)
    if trim > 0: 
        crossover_response = trim_incomplete_response(crossover_response, delim_follows_text=trim, strip_ws=False)
    if verbose: print('trimmed:', crossover_response)
    return extract_strings(crossover_response) if extract else crossover_response


In [17]:
# import random


# set_a = [answer_a] + rephrasings_a
# set_b = [answer_b] + rephrasings_b
# # set_a = []
# # set_b = []
# combinations = [
#     [i, j, set_a[i], set_b[j]] for i in range(len(set_a)) for j in range(len(set_b))
# ]
# random.shuffle(combinations)


# combinations = combinations[:max(3, len(combinations))]
# for ai, bj, a, b in combinations:
#     print(f'mother {ai}:', a)
#     print(f'father {bj}:', b)
#     crossover_result = crossover(a, b)
#     print(crossover_result)
#     print()
#     # input()
    

NameError: name 'answer_a' is not defined

In [32]:
random_sample_prompt = \
f'''
Write some random sentences:
 - "A black horse on a white background."
 - "Two businessmen shaking hands on a sidewalk."
 - "A graph showing the impact of various pesticides on the Colorado Potato Beetle."
 - "The endless sky begets fathomless depths, unknowable secrets."
 - '''

random_sample_params = {
    GenParams.DECODING_METHOD: 'sample',
    GenParams.MIN_NEW_TOKENS: 5,
    GenParams.MAX_NEW_TOKENS: 50,
    GenParams.TEMPERATURE: 0.75,
    # GenParams.RANDOM_SEED: 42,
    GenParams.REPETITION_PENALTY: 1.3,
}

def generate_random_samples(new_params={}, trim=2, extract=True, min_length=10):
    params = dict(random_sample_params)
    params.update(new_params)
    random_sample = generate_text(random_sample_prompt, new_params=params)
    if trim > 0:
        random_sample = trim_incomplete_response(random_sample)
    if extract:
        random_sample = extract_strings(random_sample)
        random_sample = [_ for _ in random_sample if len(_) >= min_length]
    return random_sample

    

# print(generate_random_samples())
# ['Even in silence I feel your presence around me like an echo.'] # jesus...

['In his youth he had been an avid cyclist;', 'now that age was creeping up and slowing him down,', 'it seemed like all he could do was pedal in circles.']


In [33]:
import random

target = 2
set_a = []
while len(set_a) < target:
    set_a += generate_random_samples()
set_a = set_a[:target]

set_b = []
while len(set_b) < target:
    set_b += generate_random_samples()
set_b = set_b[:target]

combinations = [
    [i, j, set_a[i], set_b[j]] for i in range(len(set_a)) for j in range(len(set_b))
]
random.shuffle(combinations)

for _ in combinations: print(_)


# combinations = combinations[:max(3, len(combinations))]
for ai, bj, a, b in combinations:
    print(f'mother {ai}:', a)
    print(f'father {bj}:', b)
    crossover_result = crossover(a, b)
    print(crossover_result)
    print()
    # input()
    

[1, 0, 'However he was good at what he did and people came from far afield to seek his expertise.', 'The large dog wagged its tail with excitement.']
[0, 0, 'existed in an unfashionable part of town.', 'The large dog wagged its tail with excitement.']
[0, 1, 'existed in an unfashionable part of town.', 'A purple monkey in a green forest eating blue berries.']
[1, 1, 'However he was good at what he did and people came from far afield to seek his expertise.', 'A purple monkey in a green forest eating blue berries.']
mother 1: However he was good at what he did and people came from far afield to seek his expertise.
father 0: The large dog wagged its tail with excitement.
['People came from far afield to seek the expertise of the large dog.', 'The large dog was good at what it did,', 'and people sought its expertise.', "The large dog's tail wagged with excitement as it helped people.", "The large dog's expertise was sought by people who traveled far."]

mother 0: existed in an unfashionabl