# Generation Task

## Load Package

In [1]:
import os
import json
from tqdm import tqdm
from langchain_openai import ChatOpenAI
from langchain_google_genai import ChatGoogleGenerativeAI, \
    HarmBlockThreshold, HarmCategory
from langchain_anthropic import ChatAnthropic
from langchain.prompts import ChatPromptTemplate
from _api_key import get_openai_api_key, get_google_api_key, get_claude_api_key

## Basic Function

In [2]:
def load_json_file(file_path):
    """
    Load json file
    """
    with open(file_path,'r',encoding='utf-8') as f:
        file = json.load(f)
        f.close()
    return file

def save_json_file(file, file_path, sort_keys:bool=False):
    """
    Save json file
    """
    with open(file_path,'w',encoding='utf-8') as f:
        json.dump(file, f, indent=4, ensure_ascii=False, sort_keys=sort_keys)
        f.close()

In [3]:
def splitDataset(file, puntype='hom'):
    """
    Enter path or json file to separate the pun part from the non-pun part of the dataset
    """
    if isinstance(file,str):
        dataset = load_json_file(file)
    else:
        dataset = file
    punDataset = dict()
    nonpunDataset = dict()
    for ID in dataset:
        data = dataset[ID]
        if puntype in ID:
            if data.get('pun_word', False):
                punDataset[ID] = data
            else:
                nonpunDataset[ID] = data
    return punDataset, nonpunDataset

In [4]:
def add_human_pun(pun_dataset, save:bool=False, path_gen='pun_generation.json'):
    """
    Add human-written puns
    """
    path_gen = './results/' + path_gen
    if os.path.exists(path_gen):
        pun_generation = load_json_file(path_gen)
    else:
        pun_generation = dict()
    for ID in pun_dataset:
        data = pun_dataset[ID]
        human_text = data['human_text']
        if ID not in pun_generation:
            pun_generation[ID] = {'human_text':human_text}
        else:
            pun_generation[ID].update({'human_text':human_text})
    if save:
        save_json_file(pun_generation, path_gen)

## Function of Generation

In [5]:
def call_llm_to_generate(model, dataset, model_name:str=None, method:int=0, examples:dict=None,
                         save:bool=False, path_gen='pun_generation.json', batch_size:int=1):
    """
    Try generating puns with llm  \n
    method=0: Generate non-pun sentence with only a single layer of meaning \n
    method=1: Given pun pair, generate pun directly \n
    method=2: Given specific contextual word set, generate pun directly  \n
    """
    def parse_output(ID, output:str):
        # Parse output and get the result
        try:
            output = output[output.index('{'): output.index('}')+1]
        except:
            output = output
        try:
            output = eval(output)
            sentence = output['Sentence']
        except:
            # print(ID, output)
            try:
                sentence = output.split('Sentence')[-1]
            except:
                sentence = 'No correctly parsed result.'
        return sentence

    def get_punchline_elements(data:dict):
        # Get pun word/sense, alternative word/sense, etc. from data
        pun_word = data['pun_word']
        pun_sense = data['pun_sense']
        alter_word = data['alter_word']
        alter_sense = data['alter_sense']
        context_words = data['human_keywords']
        return pun_word, pun_sense, alter_word, alter_sense, context_words

    def extend_dict(given_dict, keys):
        # Create a null value based on the keys
        temp = given_dict
        for key in keys:
            if key not in temp:
                temp[key] = dict()
            temp = temp[key]
        return given_dict

    assert method in [0, 1, 2]
    path_gen = './results/' + path_gen
    if os.path.exists(path_gen):
        record_gen = load_json_file(path_gen)
    else:
        record_gen = dict()
    # [A]. Construct the prompt
    definition = """<*Definition*>\nPuns are a form of wordplay exploiting different meanings of a word or similar-sounding words, while non-puns are jokes or statements that don't rely on such linguistic ambiguities.\n\n"""
    # Method=0: Generate non-pun that only expresses one sense
    if method == 0:
        instruction = """<*Instruction*>\nBelow is a keyword and one of its meanings. Please generate a non-pun sentence with the keyword that conveys the given meaning. You must output the current status in a parsable JSON format. An example output looks like:\n{{"Sentence": "XXX"}}"""
        examples_string = """"""
        testing = """\n\n<*Your Response*>\nKeyword: {punchline}\nMeaning: {pun_word} <{pun_sense}>\nOutput:"""
    # Method=1: Generate pun directly
    elif method == 1:
        instruction = """<*Instruction*>\nBelow is a keyword and two of its meanings. Please generate a pun sentence with punchline on the keyword that conveys both given meanings simultaneously. Except for the keyword, the pun sentence must not utilize any words from either of the two meanings. Besides, once a keyword is used, it's strictly prohibited to use it again in the latter half of the sentence.\nYou must output the current status in a parsable JSON format. An example output looks like:\n{{"Sentence": "XXX"}}"""
        if examples is not None:
            examples_temp = []
            for ID in examples:
                example = examples[ID]
                pun_word, pun_sense, alter_word, alter_sense, context_words = get_punchline_elements(data=example)
                punchline, text = example['punchline'], example['human_text']
                examples_temp.append(f"Keyword: {punchline}\n"
                                     f"Meaning 1: {pun_word} <{pun_sense}>\n"
                                     f"Meaning 2: {alter_word} <{alter_sense}>\n"
                                     f"Output:\n{{{{\"Sentence\": \"{text}\"}}}}")
            examples_string = '\n\n<*Examples*>\n' + '\n\n'.join(examples_temp)
        else:
            examples_string = """"""
        testing = """\n\n<*Your Response*>\nKeyword: {punchline}\nMeaning 1: {pun_word} <{pun_sense}>\nMeaning 2: {alter_word} <{alter_sense}>\nOutput:"""
    # Method=2: Generate puns directly based on specific contextual words
    else:
        instruction = """<*Instruction*>\nBelow is a keyword, two of its meanings and a set of contextual words. Please generate a pun sentence with punchline on the keyword that conveys both given meanings simultaneously and using all the contextual words. Except for the keyword, the pun sentence must not utilize any words from either of the two meanings. Besides, once a keyword is used, it's strictly prohibited to use it again in the latter half of the sentence.\nYou must output the current status in a parsable JSON format. An example output looks like:\n{{"Sentence": "XXX"}}"""
        if examples is not None:
            examples_temp = []
            for ID in examples:
                example = examples[ID]
                pun_word, pun_sense, alter_word, alter_sense, context_words = get_punchline_elements(data=example)
                punchline, text = example['punchline'], example['human_text']
                examples_temp.append(f"Keyword: {punchline}\n"
                                     f"Meaning 1: {pun_word} <{pun_sense}>\n"
                                     f"Meaning 2: {alter_word} <{alter_sense}>\n"
                                     f"Contextual Words: {', '.join(context_words)}.\n"
                                     f"Output:\n{{{{\"Sentence\": \"{text}\"}}}}")
            examples_string = '\n\n<*Examples*>\n' + '\n\n'.join(examples_temp)
        else:
            examples_string = """"""
        testing = """\n\n<*Your Response*>\nKeyword: {punchline}\nMeaning 1: {pun_word} <{pun_sense}>\nMeaning 2: {alter_word} <{alter_sense}>\nContextual Words: {context_words}.\nOutput:"""
    # Combine all parts together
    prompt_string = definition + instruction + examples_string + testing
    chat_prompt = ChatPromptTemplate.from_template(prompt_string)

    # [B]. Call llm to generate
    if model_name is None:
        model_name = model.model_name if hasattr(model,'model_name') else model.model
        model_name = model_name.split('/')[-1]
    key_gen = f"{model_name}_text"
    IDs = list(dataset.keys())
    IDs_loaded = []
    for ID in record_gen:
        if record_gen[ID].get(key_gen, False) and \
           record_gen[ID][key_gen].get(f'method {method}', False):
            IDs_loaded.append(ID)
    all_ind = list(range(0,len(IDs)))
    batch_ind = list(range(0,len(IDs),batch_size))
    for ind in tqdm(all_ind):
        if ind not in batch_ind:
            continue
        IDs_batch = IDs[ind: ind+batch_size]
        # Remove the data that has already been run
        IDs_batch = list(set(IDs_batch)-set(IDs_loaded))
        if len(IDs_batch) == 0:
            continue
        _inputs = []
        for ID in IDs_batch:
            data = dataset[ID]
            pun_ind = int(data['pun_word_ind'].split('_')[-1]) - 1
            punchline = data['human_text'].split(' ')[pun_ind]
            pun_word, pun_sense, alter_word, alter_sense, context_words = get_punchline_elements(data=data)
            _inputs.append(chat_prompt.format_messages(pun_word=pun_word, pun_sense=pun_sense,
                                                       alter_word=alter_word, alter_sense=alter_sense,
                                                       punchline=punchline, context_words=', '.join(context_words)))
        _outputs = [out.content for out in model.batch(_inputs)]
        # print(_inputs[0][0].content)
        # print(_outputs[0])
        # break
        for ID,out in zip(IDs_batch, _outputs):
            sentence = parse_output(ID, out)
            record_gen = extend_dict(record_gen, [ID, key_gen])
            record_gen[ID][key_gen].update({f'method {method}': sentence})
        if save:
            save_json_file(record_gen, path_gen)

## Dataset and Examples

In [6]:
hom_path = r'./dataset/hom_dataset.json'
het_path = r'./dataset/het_dataset.json'
hom_punDataset, hom_nonpunDataset = splitDataset(hom_path)
het_punDataset, het_nonpunDataset = splitDataset(het_path, puntype='het')

# add_human_pun(dict(**hom_punDataset, **het_punDataset), save=True)

In [7]:
# Choose data from examples manually
hom_examples = {
    "hom_705":{
        "pun_word": "toll",
        "pun_sense": "a fee levied for the use of roads or bridges (used for maintenance)",
        "alter_word": "toll",
        "alter_sense": "value measured by what must be given or done or undergone to obtain something",
        "punchline": "toll",
        "human_text":"Driving on so many turnpikes was taking its toll.",
        "human_keywords": ["Driving", "many", "turnpikes", "taking its toll"],
    },
    "hom_488":{
        "pun_word": "bore",
        "pun_sense": "make a hole, especially with a pointed power or hand tool",
        "alter_word": "bore",
        "alter_sense": "cause to be bored",
        "punchline": "bored",
        "human_text":"A carpenter sat on his drill and was bored to tears.",
        "human_keywords": ["carpenter", "sat", "drill", "bored to tears"],
    },
    "hom_1556":{
        "pun_word": "foil",
        "pun_sense": "a piece of thin and flexible sheet metal",
        "alter_word": "foil",
        "alter_sense": "hinder or prevent (the efforts, plans, or desires) of",
        "punchline": "foiled",
        "human_text":"One leftover said to another 'foiled again.'",
        "human_keywords": ["leftover", "foiled", "again"],
    }
}

het_examples = {
    "het_633": {
        "pun_word": "sagely",
        "pun_sense": "in a wise manner",
        "alter_word": "sage",
        "alter_sense": "aromatic fresh or dried grey-green leaves used widely as seasoning for meats and fowl and game etc",
        "punchline": "sagely",
        "human_text": "This fowl has been stuffed, said Tom sagely.",
        "human_keywords": ["fowl", "stuffed", "sagely"],
    },
    "het_530": {
        "pun_word": "toll",
        "pun_sense": "ring slowly",
        "alter_word": "tell off",
        "alter_sense": "reprimand",
        "punchline": "tolled",
        "human_text": "A tangled bell ringer tolled himself off.",
        "human_keywords": ["tangled", "bell ringer", "tolled himself off"],
    },
    "het_325": {
        "pun_word": "c",
        "pun_sense": "the 3rd letter of the Roman alphabet",
        "alter_word": "sea",
        "alter_sense": "a division of an ocean or a large body of salt water partially enclosed by land",
        "punchline": "c",
        "human_text": "An illiterate fisherman was lost at c.",
        "human_keywords": ["illiterate", "fisherman", "c"],
    }
}

## Generation

### gpt3.5

In [8]:
# Connect gpt-3.5-turbo-1106
gpt35_name = 'gpt-3.5-turbo-1106'
temperature = 0.7
openai_api_key = get_openai_api_key()  # use your api key
gpt35 = ChatOpenAI(model_name=gpt35_name, temperature=temperature,
                   openai_api_key=openai_api_key, request_timeout=120)

In [9]:
# Generation method 0 (non-pun)
call_llm_to_generate(model=gpt35, dataset=hom_punDataset, method=0, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=gpt35, dataset=het_punDataset, method=0, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [02:55<00:00,  4.62it/s]
100%|██████████| 647/647 [03:12<00:00,  3.36it/s]


In [10]:
# Generation method 1
call_llm_to_generate(model=gpt35, dataset=hom_punDataset, method=1, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=gpt35, dataset=het_punDataset, method=1, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [03:36<00:00,  3.74it/s]
  8%|▊         | 51/647 [00:15<02:45,  3.61it/s]

het_848 {"Sentence": "I can't put this book down, it's so engaging," said the musician, reedily."}


 16%|█▌        | 101/647 [00:25<01:49,  5.01it/s]

het_708 {"Sentence": "I asked the computer for a snack, and it said, 'I can't eat, but I can byte!'}


 19%|█▊        | 121/647 [00:29<01:45,  4.99it/s]

het_665 {"Sentence": "I can't watch, it's too loud!" the moviegoer screamed at the screen."}


 84%|████████▎ | 541/647 [01:50<00:18,  5.66it/s]

het_1221 {"Sentence": "I can only lift on the weekends," said the weightlifter weakly."}


100%|██████████| 647/647 [02:09<00:00,  4.98it/s]


In [12]:
# Generation method 2
call_llm_to_generate(model=gpt35, dataset=hom_punDataset, method=2, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=gpt35, dataset=het_punDataset, method=2, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [02:38<00:00,  5.11it/s]
100%|██████████| 647/647 [02:06<00:00,  5.11it/s]


### gpt4

In [14]:
# Connect gpt-4-1106-preview
gpt4_name = 'gpt-4-1106-preview'
temperature = 0.7
openai_api_key = get_openai_api_key()  # use your api key
gpt4 = ChatOpenAI(model_name=gpt4_name, temperature=temperature,
                    openai_api_key=openai_api_key, request_timeout=120)

In [15]:
# Generation method 1
call_llm_to_generate(model=gpt4, dataset=hom_punDataset, method=1, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=gpt4, dataset=het_punDataset, method=1, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [04:17<00:00,  3.14it/s]
 30%|██▉       | 191/647 [00:47<01:44,  4.38it/s]

het_473 {"Sentence": "I'll accept your gift of flowers willingly," she said gladly."}


100%|██████████| 647/647 [02:48<00:00,  3.83it/s]


In [17]:
# Generation method 2
call_llm_to_generate(model=gpt4, dataset=hom_punDataset, method=2, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=gpt4, dataset=het_punDataset, method=2, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [03:27<00:00,  3.90it/s]
100%|██████████| 647/647 [04:26<00:00,  2.42it/s]


### gemini-pro

In [19]:
# Connect gemini-pro
gemini_name = 'gemini-pro'
temperature = 0.7
google_api_key = get_google_api_key()  # use your api key
gemini = ChatGoogleGenerativeAI(
        model=gemini_name,
        google_api_key=google_api_key,
        temperature=temperature,
        transport='rest',
        safety_settings={
            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE
            },
)

In [20]:
# Generation method 1
call_llm_to_generate(model=gemini, dataset=hom_punDataset, method=1, examples=hom_examples, batch_size=1, save=True)
call_llm_to_generate(model=gemini, dataset=het_punDataset, method=1, examples=het_examples, batch_size=1, save=True)

 80%|████████  | 649/810 [22:01<05:04,  1.89s/it]

hom_1412 {"Sentence": "The falling apple gravitated toward the earth and Newton.}


100%|██████████| 810/810 [27:16<00:00,  2.02s/it]
100%|██████████| 647/647 [21:18<00:00,  1.98s/it]


In [22]:
# Generation method 2
call_llm_to_generate(model=gemini, dataset=hom_punDataset, method=2, examples=hom_examples, batch_size=1, save=True)
call_llm_to_generate(model=gemini, dataset=het_punDataset, method=2, examples=het_examples, batch_size=1, save=True)

100%|██████████| 810/810 [30:08<00:00,  2.23s/it]


100%|██████████| 647/647 [23:42<00:00,  2.20s/it]


### claude3

In [8]:
# Connect claude-3-opus-20240229
claude3_name = 'claude-3-opus-20240229'
temperature = 0.7
claude_api_key = get_claude_api_key()  # use your api key
claude3 = ChatAnthropic(model_name=claude3_name, temperature=temperature,
                        anthropic_api_key=claude_api_key)

In [9]:
# Generation method 1
call_llm_to_generate(model=claude3, dataset=hom_punDataset, method=1, examples=hom_examples, save=True)
call_llm_to_generate(model=claude3, dataset=het_punDataset, method=1, examples=het_examples, save=True)

100%|██████████| 810/810 [17:20<00:00,  1.28s/it]
100%|██████████| 647/647 [1:19:20<00:00,  7.36s/it]  


In [11]:
# Generation method 2
call_llm_to_generate(model=claude3, dataset=hom_punDataset, method=2, examples=hom_examples, save=True)
call_llm_to_generate(model=claude3, dataset=het_punDataset, method=2, examples=het_examples, save=True)

100%|██████████| 810/810 [00:00<00:00, 30117.07it/s]
100%|██████████| 647/647 [00:00<00:00, 41475.72it/s]


### vicuna

In [8]:
# Connect to a server
# Use local langChain with fastChat (Terminal code)
# python3 -m fastchat.serve.controller
# python3 -m fastchat.serve.model_worker --model-names "gpt-3.5-turbo,text-davinci-003,text-embedding-ada-002" --model-path lmsys/vicuna-7b-v1.5
# python3 -m fastchat.serve.openai_api_server --host localhost --port 8000

# Connect vicuna-7b-v1.5
os.environ['OPENAI_API_BASE'] = 'http://localhost:8000/v1'
os.environ['OPENAI_API_KEY'] = 'EMPTY'

vicuna_name = 'vicuna-7b-v1.5'
temperature = 0.7
max_tokens = 300
vicuna = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens)

In [9]:
# Generation method 1
call_llm_to_generate(model=vicuna, model_name=vicuna_name, dataset=hom_punDataset, method=1, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=vicuna, model_name=vicuna_name, dataset=het_punDataset, method=1, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [00:00<00:00, 187018.95it/s]
100%|██████████| 647/647 [00:00<00:00, 195161.07it/s]


In [11]:
# Generation method 2
call_llm_to_generate(model=vicuna, model_name=vicuna_name, dataset=hom_punDataset, method=2, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=vicuna, model_name=vicuna_name, dataset=het_punDataset, method=2, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [03:48<00:00,  3.55it/s]
100%|██████████| 647/647 [00:00<00:00, 194768.87it/s]


### llama2

In [8]:
# Connect to a server
# Use local langChain with fastChat (Terminal code)
# python3 -m fastchat.serve.controller
# python3 -m fastchat.serve.model_worker --model-names "gpt-3.5-turbo,text-davinci-003,text-embedding-ada-002" --model-path meta-llama/Llama-2-7b-chat-hf
# python3 -m fastchat.serve.openai_api_server --host localhost --port 8000

# Connect llama-2-7b-chat
os.environ['OPENAI_API_BASE'] = 'http://localhost:8000/v1'
os.environ['OPENAI_API_KEY'] = 'EMPTY'

llama2_name = 'llama-2-7b-chat'
temperature = 0.7
max_tokens = 300
llama2 = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens)

In [9]:
# Generation method 1
call_llm_to_generate(model=llama2, model_name=llama2_name, dataset=hom_punDataset, method=1, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=llama2, model_name=llama2_name, dataset=het_punDataset, method=1, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [31:05<00:00,  2.30s/it]
100%|██████████| 647/647 [24:04<00:00,  2.23s/it]


In [11]:
# Generation method 2
call_llm_to_generate(model=llama2, model_name=llama2_name, dataset=hom_punDataset, method=2, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=llama2, model_name=llama2_name, dataset=het_punDataset, method=2, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [00:00<00:00, 202720.11it/s]
100%|██████████| 647/647 [00:00<00:00, 202621.87it/s]


### mistral

In [8]:
# Connect to a server
# Use local langChain with fastChat (Terminal code)
# python3 -m fastchat.serve.controller
# python3 -m fastchat.serve.model_worker --model-names "gpt-3.5-turbo,text-davinci-003,text-embedding-ada-002" --model-path mistralai/Mistral-7B-Instruct-v0.2
# python3 -m fastchat.serve.openai_api_server --host localhost --port 8000

# Connect mistral-7b-instruct-v0.2
os.environ['OPENAI_API_BASE'] = 'http://localhost:8000/v1'
os.environ['OPENAI_API_KEY'] = 'EMPTY'

mistral_name = 'mistral-7b-instruct-v0.2'
temperature = 0.7
max_tokens = 300
mistral = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens)

In [9]:
# Generation method 1
call_llm_to_generate(model=mistral, model_name=mistral_name, dataset=hom_punDataset, method=1, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=mistral, model_name=mistral_name, dataset=het_punDataset, method=1, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [12:00<00:00,  1.12it/s]
100%|██████████| 647/647 [11:15<00:00,  1.04s/it]


In [11]:
# Generation method 2
call_llm_to_generate(model=mistral, model_name=mistral_name, dataset=hom_punDataset, method=2, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=mistral, model_name=mistral_name, dataset=het_punDataset, method=2, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [22:17<00:00,  1.65s/it]
100%|██████████| 647/647 [19:21<00:00,  1.80s/it]


### openchat

In [8]:
# Connect to a server
# Use local langChain with fastChat (Terminal code)
# python3 -m fastchat.serve.controller
# python3 -m fastchat.serve.model_worker --model-names "gpt-3.5-turbo,text-davinci-003,text-embedding-ada-002" --model-path openchat/openchat-3.5-0106
# python3 -m fastchat.serve.openai_api_server --host localhost --port 8000

# Connect openchat-3.5-0106
os.environ['OPENAI_API_BASE'] = 'http://localhost:8000/v1'
os.environ['OPENAI_API_KEY'] = 'EMPTY'

openchat_name = 'openchat-3.5-0106'
temperature = 0.7
max_tokens = 300
openchat = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=temperature, max_tokens=max_tokens)

In [9]:
# Generation method 1
call_llm_to_generate(model=openchat, model_name=openchat_name, dataset=hom_punDataset, method=1, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=openchat, model_name=openchat_name, dataset=het_punDataset, method=1, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [08:28<00:00,  1.59it/s]
100%|██████████| 647/647 [06:16<00:00,  1.72it/s]


In [11]:
# Generation method 2
call_llm_to_generate(model=openchat, model_name=openchat_name, dataset=hom_punDataset, method=2, examples=hom_examples, batch_size=10, save=True)
call_llm_to_generate(model=openchat, model_name=openchat_name, dataset=het_punDataset, method=2, examples=het_examples, batch_size=10, save=True)

100%|██████████| 810/810 [08:45<00:00,  1.54it/s]
100%|██████████| 647/647 [06:48<00:00,  1.58it/s]
