In [1]:
import openai
import hermetic as hm
import ray
import pandas as pd
from hermetic.core.prompt_mgr import PromptMgr
from hermetic.core.environment import Environment, load_environments
import time

In [2]:
df = pd.read_json('resources/evals/val_sentence_pairs.json')

In [3]:
OPENAI_API_BASE = 'https://api.openai.com/v1'
OPENAI_API_KEY = 'xxxx'
AE_API_BASE ='https://api.endpoints.anyscale.com/v1'
AE_API_KEY = 'xxxx'

In [4]:
model_short_names = {'openai://gpt-3.5-turbo' : 'gpt35', 
                    'openai://gpt-4': 'gpt4', 
                    'meta-llama/Llama-2-7b-chat-hf': 'llama7', 
                    'meta-llama/Llama-2-13b-chat-hf': 'llama13',
                    'meta-llama/Llama-2-70b-chat-hf': 'llama70'
                   }

In [5]:
models_to_test = ['openai://gpt-3.5-turbo', 'openai://gpt-4', 'meta-llama/Llama-2-7b-chat-hf', 
                  'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-70b-chat-hf']

In [6]:
def get_api_params(model):
    """ We need to access both Anyscale Endpoints APIs as well as OpenAI APIs.
        This makjes it convenient
    """
    if model.startswith('openai://'): 
        return (OPENAI_API_BASE, OPENAI_API_KEY)
    else: 
        return (AE_API_BASE, AE_API_KEY)

In [7]:
environments_to_test = load_environments()

In [8]:
environments_to_test.keys()

dict_keys(['answer_last', 'answer_first'])

In [9]:
#Let's focus on Answer first. 



In [10]:
@ray.remote(num_cpus=0.25, max_retries=25, retry_exceptions=True) 
def query_model(model, row, prompt_mgr, swap_answers = False): 
    if swap_answers: 
        prompt = prompt_mgr.bind('consistent').render(
                    article_sent=row['article_sent'], 
                    option_a=row['incorrect_sent'],
                    option_b=row['correct_sent'])
    else: 
        prompt = prompt_mgr.bind('consistent').render(
                    article_sent=row['article_sent'], 
                    option_a=row['correct_sent'],
                    option_b=row['incorrect_sent'])
        
    system_prompt = prompt_mgr.bind('system').render()
                
    api_base, api_key = get_api_params(model) 
    model = model.replace('openai://','')
    result = openai.ChatCompletion.create(model=model,
                temperature=0, 
                api_key=api_key, 
                api_base=api_base,                           
                messages = [{
                "role":"system",
                "content": system_prompt},
                {"role":"user",
                "content": prompt
                }])
    time.sleep(1) 
    output = result['choices'][0]['message']['content']
    return output

In [11]:
# Let's test this. 
future = query_model.remote(models_to_test[0], df.loc[0], environments_to_test['answer_first'].prompt_mgr, swap_answers=True)

2023-08-11 10:05:24,231	INFO worker.py:1431 -- Connecting to existing Ray cluster at address: 10.0.50.173:6379...
2023-08-11 10:05:24,242	INFO worker.py:1612 -- Connected to Ray cluster. View the dashboard at [1m[32mhttps://session-6pe7dmsqimf5rsn3as27if1iyp.i.anyscaleuserdata-staging.com [39m[22m
2023-08-11 10:05:24,256	INFO packaging.py:346 -- Pushing file package 'gcs://_ray_pkg_0fbd40116b856f06e76ec625d16bf8ce.zip' (4.43MiB) to Ray cluster...
2023-08-11 10:05:24,269	INFO packaging.py:359 -- Successfully pushed file package 'gcs://_ray_pkg_0fbd40116b856f06e76ec625d16bf8ce.zip'.


In [12]:
result = ray.get(future) 

In [13]:
print(result)

B


In [16]:
@ray.remote(max_retries = 25, retry_exceptions = True) 
def get_results_for_model(model, df, environment, swap_answers = False):
    futures = [query_model.remote(model,row, environment.prompt_mgr, swap_answers = swap_answers) for _, row in df.iterrows()]
    return ray.get(futures)

In [17]:
st = time.time() 
futures = [get_results_for_model.remote(model, df, env) for model in models_to_test]
futures_inv = [get_results_for_model.remote(model, df, env, swap_answers=True) for model in models_to_test]
results = ray.get(futures + futures_inv)
et = time.time()
print(f'Elapsed wall clock time: {et-st}') 

[2m[1m[36m(autoscaler +29s)[0m Tip: use `ray status` to view detailed cluster status. To disable these messages, set RAY_SCHEDULER_EVENTS=0.
[2m[1m[36m(autoscaler +29s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +45s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +55s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +1m10s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +1m19s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +1m29s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +1m44s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +1m54s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +2m5s)[0m Failed to launch 2 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +2m15s)[

KeyboardInterrupt: 

In [18]:
# Now assign the results to the table (we had to put both in the ray.get() call

for i in range(len(models_to_test)):
    df[model_short_names[models_to_test[i]]] = results[i]
for i in range(len(models_to_test), 2*len(models_to_test)):
    df[model_short_names[models_to_test[i-len(models_to_test)]]+'-swap'] = results[i]

In [19]:
df[['article_sent', 'correct_sent', 'incorrect_sent', 'gpt35', 'gpt35-swap', 'gpt4', 'gpt4-swap', 'llama7', 'llama7-swap', 'llama13', 'llama13-swap', 'llama70', 'llama70-swap']]

Unnamed: 0,article_sent,correct_sent,incorrect_sent,gpt35,gpt35-swap,gpt4,gpt4-swap,llama7,llama7-swap,llama13,llama13-swap,llama70,llama70-swap
0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,the abc have reported that those who receive c...,A,B,A,B,"Based on the article sentence provided, Summar...","Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nAccording t...","Sure, I can help you with that!\n\nBased on th...",The correct answer is A: those who receive cen...,The correct answer is (B) those who receive ce...
1,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a bid for t...,A,B,A,B,"Based on the article sentence provided, I woul...","Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nBased on the ar...","Sure, I can help you with that!\n\nAfter caref...",The correct answer is A: five ambitious clubs ...,The correct answer is (B) five ambitious clubs...
2,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,the 25 year old from pennsylvania went viral o...,A,A,A,B,"Based on the article sentence provided, I woul...","Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nAccording to th...","Sure, I can help you with that!\n\nBased on th...",A,The correct answer is (B).\n\nThe article stat...
3,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,seven games involving nimes were arrested last...,A,B,A,B,"Based on the article sentence provided, the mo...","Based on the article sentence provided, the mo...","Sure, I'd be happy to help!\n\nBased on the ar...","Sure, I'd be happy to help!\n\nBased on the in...",The correct answer is A: seven games involving...,The correct answer is (B) seven games involvin...
4,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,driver's side were immediately shatters and fa...,A,B,A,B,"Based on the article sentence provided, the mo...","Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...","Sure, I can help you with that!\n\nBased on th...",The correct answer is A: driver's side of the ...,The correct answer is B.\n\nExplanation:\nSumm...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,former manchester city boss roberto mancini ha...,roberto mancini has turned up the heat on succ...,roberto mancini has turned up the heat for man...,A,B,A,B,"Based on the article sentence provided, I woul...","Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nAccording t...","Sure, I can help you with that!\n\nAccording t...",A,B
369,louis van gaal said that he believed wenger's ...,louis van gaal says he believed wenger is esti...,louis van gaal says wenger's estimation of the...,B,A,B,A,"Based on the article sentence provided, the mo...","Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...","Sure, I can help you with that!\n\nAccording t...",The correct answer is B.\n\nExplanation:\n\nTh...,The correct answer is A.\n\nExplanation:\n\nSu...
370,lloris wearing the scarf after defeat by chels...,lloris wearing the scarf after defeat by chels...,lloris wearing the scarf after defeat by chels...,A,A,A,B,"Based on the article sentence provided, the mo...","Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...","Sure, I can help you with that!\n\nAccording t...",A,A
371,in an anonymous confession to australia's gold...,the woman reveals she once had sex with two gu...,the woman reveals she was supposed to be seein...,A,B,A,B,"I apologize, but I cannot provide an answer to...","I apologize, but I cannot provide an answer to...","Sure, I'd be happy to help!\n\nAccording to th...","Sure, I can help you with that!\n\nAccording t...",A,The correct answer is (B). The article sentenc...


In [28]:
df.to_json('computed_results.json')

In [14]:
df = pd.read_json('computed_results.json')

In [15]:
df

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id,gpt35,gpt4,llama7,llama13,llama70,gpt35-swap,gpt4-swap,llama7-swap,llama13-swap,llama70-swap
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0,A,A,"Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nAccording t...",The correct answer is A: those who receive cen...,B,B,"Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nBased on th...",The correct answer is (B) those who receive ce...
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1,A,A,"Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nBased on the ar...",The correct answer is A: five ambitious clubs ...,B,B,"Based on the article sentence provided, I woul...","Sure, I can help you with that!\n\nAfter caref...",The correct answer is (B) five ambitious clubs...
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1,A,A,"Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nAccording to th...",A,A,B,"Based on the article sentence provided, I woul...","Sure, I can help you with that!\n\nBased on th...",The correct answer is (B).\n\nThe article stat...
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2,A,A,"Based on the article sentence provided, the mo...","Sure, I'd be happy to help!\n\nBased on the ar...",The correct answer is A: seven games involving...,B,B,"Based on the article sentence provided, the mo...","Sure, I'd be happy to help!\n\nBased on the in...",The correct answer is (B) seven games involvin...
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2,A,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",The correct answer is A: driver's side of the ...,B,B,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nBased on th...",The correct answer is B.\n\nExplanation:\nSumm...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,7682ecf708029ba1328e6b5b1f8fe841efa92495,former manchester city boss roberto mancini ha...,roberto mancini has turned up the heat on succ...,82-4,roberto mancini has turned up the heat for man...,76-4,A,A,"Based on the article sentence provided, I woul...","Sure, I can help you with that!\n\nAccording t...",A,B,B,"Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nAccording t...",B
369,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,louis van gaal said that he believed wenger's ...,louis van gaal says he believed wenger is esti...,44-2,louis van gaal says wenger's estimation of the...,86-2,B,B,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",The correct answer is B.\n\nExplanation:\n\nTh...,A,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",The correct answer is A.\n\nExplanation:\n\nSu...
370,06819c41879edbe99b8612b552afd80b96356ef7,lloris wearing the scarf after defeat by chels...,lloris wearing the scarf after defeat by chels...,19-2,lloris wearing the scarf after defeat by chels...,12-2,A,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",A,A,B,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",A
371,85506154c3f291ddd0cc3935dc09215f9c4437ed,in an anonymous confession to australia's gold...,the woman reveals she once had sex with two gu...,22-2,the woman reveals she was supposed to be seein...,60-2,A,A,"I apologize, but I cannot provide an answer to...","Sure, I'd be happy to help!\n\nAccording to th...",A,B,B,"I apologize, but I cannot provide an answer to...","Sure, I can help you with that!\n\nAccording t...",The correct answer is (B). The article sentenc...


In [28]:
from hermetic.agents.openai_chat_agent import Agent
import openai

NAME = 'multichoice'
MODEL = 'meta-llama/Llama-2-70b-chat-hf'
class Multichoice(Agent):
    """
    Multichoice 
    """

    def __init__(self, env):
        # Call the super class constructor
        super().__init__(environment = env, id=NAME)
        env.add_agent(NAME, self)
        self.pm = self.env.prompt_mgr
        openai.api_key ='https://api.endpoints.anyscale.com/v1'
        openai.api_base = 'esecret_8ebagkp835rngrnl2bza3hrnfg'
        self.model = MODEL
        
        
        
    def process_input(self, inp: str):
        sys_msg = self.pm.bind('multichoice_system').render()
        multichoice = self.pm.bind('multichoice_query').render(query=inp)  
        message_history = self.message_history = [{
            'role': 'system',
            'content': sys_msg
        },
        {
            'role': 'user',
            'content': multichoice
        }]
        
        response = openai.ChatCompletion.create(
           model = self.model,
           messages = message_history,
           temperature = 0, 
           api_base ='https://api.endpoints.anyscale.com/v1',
           api_key = 'esecret_8ebagkp835rngrnl2bza3hrnfg',
           stream = True
        )
        words = ''
        for tok in response: 
            delta = tok.choices[0].delta
            if not delta: # End token 
                self.message_history.append({
                    'role': 'assistant',
                    'content': words
                })
                break
            elif 'content' in delta:
                words += delta['content']
                yield delta['content'] 
            else: 
                continue


   

In [29]:
mc = Multichoice(env)
result = mc.process_all(df['llama7-swap'][0])


In [30]:
result

'B'

In [32]:


@ray.remote(num_cpus=1, max_retries=25, retry_exceptions=True)
def clean_input(messy_string): 
    mc = Multichoice(env)
    result = mc.process_all(messy_string).strip()
    #print(f'Result is: {result}')
    #if (result not in ['A','B']):
    #  print(f'----\nWarning: input was {messy_string}.\n\nCleaned up version is {result}\n\n---\n')
    return  result
    



f = clean_input.remote(df['llama7-swap'][0])

In [34]:
ray.get(f)

'B'

In [35]:

@ray.remote(max_retries=15, retry_exceptions=True, num_cpus=2)
def clean_column(col): 
    futures = [clean_input.remote(val) for _,val in col.items()]
    print(f'kicked off {len(futures)} tasks') 
    cleaned = ray.get(futures)
    return cleaned

In [36]:
wtf = clean_column.remote(df['llama70'][:10])

[2m[36m(clean_column pid=571276)[0m kicked off 10 tasks


In [37]:
results = ray.get(wtf)

In [38]:
results

['A', 'A', 'A', 'A', 'A', 'A', 'B', 'other', 'A', 'A']

In [39]:
to_relabel = ['llama7', 'llama7-swap', 'llama13', 'llama13-swap', 'llama70', 'llama70-swap']

In [40]:
futures = [clean_column.remote(df[label]) for label in to_relabel]

[2m[36m(clean_column pid=571276)[0m kicked off 373 tasks


In [41]:
cleaned_cols = ray.get(futures)

[2m[1m[36m(autoscaler +12m36s)[0m Tip: use `ray status` to view detailed cluster status. To disable these messages, set RAY_SCHEDULER_EVENTS=0.
[2m[1m[36m(autoscaler +12m36s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +12m52s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +13m1s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +13m11s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +13m26s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +13m37s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +13m46s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +13m56s)[0m Failed to launch 4 node(s) of type worker-node-type-0.
[2m[1m[36m(autoscaler +14m6s)[0m Failed to launch 3 node(s) of type worker-node-type-0.
[2m[1m[36m(aut

In [42]:
len(cleaned_cols)

6

In [43]:
for i in range(len(cleaned_cols)):
    df[to_relabel[i] + '-clean'] = cleaned_cols[i]

In [44]:
df

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id,gpt35,gpt4,llama7,llama13,...,gpt4-swap,llama7-swap,llama13-swap,llama70-swap,llama7-clean,llama7-swap-clean,llama13-clean,llama13-swap-clean,llama70-clean,llama70-swap-clean
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0,A,A,"Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nAccording t...",...,B,"Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nBased on th...",The correct answer is (B) those who receive ce...,B,B,A,B,A,B
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1,A,A,"Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nBased on the ar...",...,B,"Based on the article sentence provided, I woul...","Sure, I can help you with that!\n\nAfter caref...",The correct answer is (B) five ambitious clubs...,B,B,A,B,A,B
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1,A,A,"Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nAccording to th...",...,B,"Based on the article sentence provided, I woul...","Sure, I can help you with that!\n\nBased on th...",The correct answer is (B).\n\nThe article stat...,B,B,A,B,A,B
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2,A,A,"Based on the article sentence provided, the mo...","Sure, I'd be happy to help!\n\nBased on the ar...",...,B,"Based on the article sentence provided, the mo...","Sure, I'd be happy to help!\n\nBased on the in...",The correct answer is (B) seven games involvin...,B,B,A,B,A,B
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2,A,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",...,B,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nBased on th...",The correct answer is B.\n\nExplanation:\nSumm...,B,B,A,B,A,B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,7682ecf708029ba1328e6b5b1f8fe841efa92495,former manchester city boss roberto mancini ha...,roberto mancini has turned up the heat on succ...,82-4,roberto mancini has turned up the heat for man...,76-4,A,A,"Based on the article sentence provided, I woul...","Sure, I can help you with that!\n\nAccording t...",...,B,"Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nAccording t...",B,B,B,A,B,A,B
369,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,louis van gaal said that he believed wenger's ...,louis van gaal says he believed wenger is esti...,44-2,louis van gaal says wenger's estimation of the...,86-2,B,B,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",...,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",The correct answer is A.\n\nExplanation:\n\nSu...,B,B,B,A,B,A
370,06819c41879edbe99b8612b552afd80b96356ef7,lloris wearing the scarf after defeat by chels...,lloris wearing the scarf after defeat by chels...,19-2,lloris wearing the scarf after defeat by chels...,12-2,A,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",...,B,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",A,B,B,A,A,A,A
371,85506154c3f291ddd0cc3935dc09215f9c4437ed,in an anonymous confession to australia's gold...,the woman reveals she once had sex with two gu...,22-2,the woman reveals she was supposed to be seein...,60-2,A,A,"I apologize, but I cannot provide an answer to...","Sure, I'd be happy to help!\n\nAccording to th...",...,B,"I apologize, but I cannot provide an answer to...","Sure, I can help you with that!\n\nAccording t...",The correct answer is (B). The article sentenc...,other,other,A,B,A,B


Sure, I can help you with that!

According to the article sentence, those who receive Centrelink payments made up half of Radio Rental's income last year.

Therefore, the more consistent summary with the article is:

Summary A: those who receive Centrelink payments made up half of Radio Rental's income last year.

Summary B is not as consistent with the article because it states that the ABC reported that those who receive Centrelink payments made up Radio Rental's income last year, which is not directly supported by the article sentence.


In [None]:
#Cleaning up the inputs.


In [90]:
#Hm we have a problem here. Only GPT actually reliably follows instructions. 
def is_correct(row): 
    print(f'Row is {row}')
    if (row[0] == 'A' and row[1] == 'B'):
        return 'Y'
    if (row[0] == row[1]):
        print('Same value')
        return row[0]*2
    return 'N'

In [93]:
for sn in ['llama7', 'llama13', 'llama70']:
    df[sn+'-correct'] = df[[sn +'-clean', sn+'-swap-clean']].apply(is_correct, axis=1)


Row is llama7-clean         B
llama7-swap-clean    B
Name: 0, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 1, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 2, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 3, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 4, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 5, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 6, dtype: object
Same value
Row is llama7-clean         other
llama7-swap-clean    other
Name: 7, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 8, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 9, dtype: object
Same value
Row is llama7-clean         B
llama7-swap-clean    B
Name: 10, dtype: object
Same value
Row is llama7-clean         B
llam

In [92]:
df

Unnamed: 0,article_id,article_sent,correct_sent,correct_sent_id,incorrect_sent,incorrect_sent_id,gpt35,gpt4,llama7,llama13,...,llama7-swap-clean,llama13-clean,llama13-swap-clean,llama70-clean,llama70-swap-clean,gpt35-correct,gpt4-correct,llama7-correct,llama13-correct,llama70-correct
0,67c3b0a2da09ce32a83c098da1e0aff034e790d0,the abc have reported that those who receive c...,those who receive centrelink payments made up ...,84-0,the abc have reported that those who receive c...,40-0,A,A,"Based on the article sentence provided, Summar...","Sure, I can help you with that!\n\nAccording t...",...,B,A,B,A,B,Y,Y,N,N,Y
1,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,five ambitious clubs are locked in a scramble ...,five ambitious clubs are locked in a scramble ...,44-1,five ambitious clubs are locked in a bid for t...,17-1,A,A,"Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nBased on the ar...",...,B,A,B,A,B,Y,Y,N,N,Y
2,7e37b3e3623ae0ab6568e0e73584b16fae0cc00e,but it wasn't until last year that the 25 year...,the 25 year old from pennsylvania went on a le...,45-1,the 25 year old from pennsylvania went viral o...,4-1,A,A,"Based on the article sentence provided, I woul...","Sure, I'd be happy to help!\n\nAccording to th...",...,B,A,B,A,B,AA,Y,N,N,Y
3,cdbfba47929b459daf474a1a3dee37d2351729ea,seven games involving nimes were investigated ...,seven games involving nimes were investigated ...,93-2,seven games involving nimes were arrested last...,83-2,A,A,"Based on the article sentence provided, the mo...","Sure, I'd be happy to help!\n\nBased on the ar...",...,B,A,B,A,B,Y,Y,N,N,Y
4,696ac804af5fcab8853c583f5c97e1eda6e2493a,the driver's side of the windscreen immediatel...,driver's side of the windscreen immediately sh...,61-2,driver's side were immediately shatters and fa...,42-2,A,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",...,B,A,B,A,B,Y,Y,N,N,Y
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,7682ecf708029ba1328e6b5b1f8fe841efa92495,former manchester city boss roberto mancini ha...,roberto mancini has turned up the heat on succ...,82-4,roberto mancini has turned up the heat for man...,76-4,A,A,"Based on the article sentence provided, I woul...","Sure, I can help you with that!\n\nAccording t...",...,B,A,B,A,B,Y,Y,N,N,Y
369,7c4ae3beb354e0a9f11fa09f8fbb6613a50db5cc,louis van gaal said that he believed wenger's ...,louis van gaal says he believed wenger is esti...,44-2,louis van gaal says wenger's estimation of the...,86-2,B,B,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",...,B,B,A,B,A,N,N,N,N,N
370,06819c41879edbe99b8612b552afd80b96356ef7,lloris wearing the scarf after defeat by chels...,lloris wearing the scarf after defeat by chels...,19-2,lloris wearing the scarf after defeat by chels...,12-2,A,A,"Based on the article sentence provided, the mo...","Sure, I can help you with that!\n\nAccording t...",...,B,A,A,A,A,AA,Y,N,N,AA
371,85506154c3f291ddd0cc3935dc09215f9c4437ed,in an anonymous confession to australia's gold...,the woman reveals she once had sex with two gu...,22-2,the woman reveals she was supposed to be seein...,60-2,A,A,"I apologize, but I cannot provide an answer to...","Sure, I'd be happy to help!\n\nAccording to th...",...,other,A,B,A,B,Y,Y,"I apologize, but I cannot provide an answer to...",N,Y


In [1]:
df[['llama7-clean','llama7-swap-clean']].to_numpy()

NameError: name 'df' is not defined

In [2]:
df['llama70-correct'].value_counts()/len(df)

NameError: name 'df' is not defined

In [66]:
df.to_json('final_data.json')