In [1]:
from gptcache import GPTCache
import pandas as pd
from tqdm import tqdm

In [2]:
prompt = """Can you create three true and three false claims related to this factual statement? 

"{claim}"

Can you provide your answer as Python lists named true_claims and false_claims?"""

In [3]:
model_name = "gpt-4"
cache = GPTCache(
            cache_loc=f"cache/cache_{model_name}.json",
            key_loc="openai_key.txt",
            engine=model_name
        )

In [24]:
def parse_true_false_lists(st):
    try:
        true_start = st.find("rue_claims = [")
        true_end = st.find("]", true_start)
        false_start = st.find("alse_claims = [")
        false_end = st.find("]", false_start)
        true_claims = st[true_start:true_end+1]
        true_claims = true_claims[true_claims.find("["):]
        false_claims = st[false_start:false_end+1]
        false_claims = false_claims[false_claims.find("["):]
        return eval(true_claims), eval(false_claims)
    except:
        print(st)
        print(true_claims)
        print(false_claims)
        print("---")
        return [], []


In [25]:
# Create fine-tuning dataset.
input_prompt = """[INST] <<SYS>>
{system_prompt}
<</SYS>>

{user_msg} [/INST]"""
system_prompt = "Evaluate the following claim and answer only with true or false."

In [26]:
for num in [10, 30, 50, 100]:
    creak_train = pd.read_json("/projectnb/llamagrp/feyzanb/creak/data/creak/train.json", lines=True)
    creak_train = creak_train[['sentence', 'label']][:num]
    creak_train = creak_train.loc[creak_train['label'] == "true"]
    creak_train['prompt'] = creak_train['sentence'].apply(lambda x: prompt.format(claim=x))
    queries = creak_train['prompt'].tolist()
    preds = []
    for q in tqdm(queries, desc="Generating..."):
        preds.append(cache.generate(q, max_length=500))
    preds = [parse_true_false_lists(p) for p in preds]
    true_claims = [p[0] for p in preds]
    false_claims = [p[1] for p in preds]
    true_claims = [item for sublist in true_claims for item in sublist]
    false_claims = [item for sublist in false_claims for item in sublist]

    rel_claims = true_claims + false_claims
    labels = ["True."] * len(true_claims) + ["False."] * len(false_claims)

    rel = pd.DataFrame({"label": labels, "input_prompt": rel_claims})
    rel['input_prompt'] = rel['input_prompt'].apply(lambda x: input_prompt.format(system_prompt=system_prompt, user_msg=x))
    rel.to_csv(f"creak_n{num}/rel_dev.csv", index=False)

Generating...: 100%|██████████| 503/503 [00:00<00:00, 942687.63it/s]

Sorry, but the statement "they are blood related so it is true Twin s." is a bit confusing, and doesn't provide enough clear fact to make true or false statements about. It is not clear whether we're talking about blood relation influencing twinning or something else. Please clarify the context or provide more detailed facts.


---
Sure, here is your requested Python lists:

```python
true_claims = ["Bengal fox and Arctic fox both belong to the family Canidae.", 
               "Bengal fox and Arctic fox are both types of foxes.", 
               "Bengal fox and Arctic fox are both members of the Vulpes genus."]

false_claims  = ["Bengal fox and Arctic fox both live in the same habitat.", 
                 "Bengal fox and Arctic fox originate from the same geographical location.", 
                 "Bengal fox and Arctic fox have the same adaptations for cold environments."]
```
["Bengal fox and Arctic fox both belong to the family Canidae.", 
               "Bengal fox and Arctic fox 




In [30]:
len(rel)

3006