# Cluade Round Table

In [1]:
%cd /content/drive/MyDrive/NLP/semevaltask9

/content/drive/MyDrive/NLP/semevaltask9


In [2]:
import torch
import numpy as np
import pandas as pd
import random
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import accuracy_score

In [3]:
GLOBAL_SEED = 255

np.random.seed(GLOBAL_SEED)
random.seed(GLOBAL_SEED)
torch.manual_seed(GLOBAL_SEED)
torch.use_deterministic_algorithms(True)
%env CUBLAS_WORKSPACE_CONFIG=:4096:8

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


env: CUBLAS_WORKSPACE_CONFIG=:4096:8


### Load datasets

In [4]:
# Import Dataset
data = np.load("./Data/SP_train.npy", allow_pickle = True ).tolist()
SP_train = pd.DataFrame(data)
data = np.load("./Data/SP_test.npy", allow_pickle = True ).tolist()
SP_test = pd.DataFrame(data)
data = np.load("./Data/SP_test_answer.npy", allow_pickle = True ).tolist()
SP_test_answer = pd.DataFrame(data)

data = np.load("./Data/WP_train.npy", allow_pickle = True ).tolist()
WP_train = pd.DataFrame(data)
data = np.load("./Data/WP_test.npy", allow_pickle = True ).tolist()
WP_test = pd.DataFrame(data)
data = np.load("./Data/WP_test_answer.npy", allow_pickle = True ).tolist()
WP_test_answer = pd.DataFrame(data)

data = np.load("./Data/sentence_puzzle.npy", allow_pickle = True ).tolist()
SP = pd.DataFrame(data)
data = np.load("./Data/word_puzzle.npy", allow_pickle = True ).tolist()
WP = pd.DataFrame(data)

In [5]:
for index, row in SP_test.iterrows():
  SP_test.at[index, 'label'] = int(SP_test_answer.loc[index][1])

In [6]:
for index, row in WP_test.iterrows():
  WP_test.at[index, 'label'] = int(WP_test_answer.loc[index][1])

## Initial Round

### Create DataFrame

In [None]:
claudeResult = []
count = 1
with open("/content/init_answers.txt") as file_in:
    lines = []
    for line in file_in:
        if line.split('.')[0] == str(count):
          Solution = line.split("Solution: ")[1][0]
          count +=1
        elif "Reasoning:" in line:
          Reasoning = line.split('Reasoning: ')[1].split('\n')[0]
        elif "Confidence: " in line:
          Confidence = line.split('Confidence: ')[1].split('\n')[0]
          claudeResult.append({"Solution" : Solution, "Reasoning" : Reasoning, "Confidence" : Confidence})


In [None]:
claudeResult

[{'Solution': 'A',
  'Reasoning': 'Since the hardworking farmer is the son of the lazy farmer, but the lazy farmer is not the father, the only possibility is that the lazy farmer is the mother.',
  'Confidence': '0.95'},
 {'Solution': 'C',
  'Reasoning': 'Romeo and Juliet could not be human since the only pieces of furniture were a bookshelf and bed in a rural area. The train likely caused the fish tank to fall and break.  ',
  'Confidence': '0.8'},
 {'Solution': 'C',
  'Reasoning': 'The day before your birthday, you are one year younger than you will be the next day on your birthday. This happens annually.',
  'Confidence': '1.0'},
 {'Solution': 'B',
  'Reasoning': 'If the assistant captain left, the next in command would be the captain.',
  'Confidence': '0.9  '},
 {'Solution': 'C',
  'Reasoning': '5% of 100 random names picked would be 5 people with unlisted numbers.',
  'Confidence': '1.0'},
 {'Solution': 'A',
  'Reasoning': 'For one guard to see the other smiling, they must have b

In [None]:
claudeResultDF = pd.DataFrame(claudeResult)
claudeResultDF

Unnamed: 0,Solution,Reasoning,Confidence
0,A,Since the hardworking farmer is the son of the...,0.95
1,C,Romeo and Juliet could not be human since the ...,0.8
2,C,"The day before your birthday, you are one year...",1.0
3,B,"If the assistant captain left, the next in com...",0.9
4,C,5% of 100 random names picked would be 5 peopl...,1.0
...,...,...,...
115,B,"He caught 3 more each day, so if 30 total over...",0.95
116,C,Google requires an internet connection to be a...,0.99
117,B,This explains marrying others without being ma...,0.9
118,B,"The animals listed are imaginary, so only real...",0.95


In [None]:
claudeResultDF.to_csv('./Claude_R0.csv')

# Round 1

In [None]:
# import all round 0 answers:
gpt = pd.read_csv("./GPT3.5_R0.csv")
mixtral = pd.read_csv("./Mixtral_R0.csv")
claude = pd.read_csv("./Claude_R0.csv")

In [None]:
mixtral

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Solution,Reasoning,Confidence
0,0,0,A,Let us analyze the given information:\n1. Ther...,0.70
1,1,1,C,"At first glance, it appears that Romeo and Jul...",0.90
2,2,2,C,"Before your birthday each year, you are always...",1.00
3,3,3,B,"Generally speaking, in sports organizations, t...",0.90
4,4,4,C,We know that 5% of the population has an unlis...,0.95
...,...,...,...,...,...
115,115,115,C,To calculate the number of fish caught on the ...,0.95
116,116,116,C,"When the Internet connection is disrupted, use...",0.95
117,117,117,B,"Given the statement, the person in considerati...",0.90
118,118,118,D,We can analyze this question by calculating th...,0.95


In [None]:
prompt = """Please answer the question with step-by-step reasoning. Also,evaluate your confidence level (between 0.0 and 1.0) to indicate the possibility of your answer being right. Please give the answer in this template:
Solution:
Reasoning:
Confidence:

Question: """

def initial_prompt(item, prompt):

  labels = ["A", "B", "C", "D"]
  prompt += f'{item["question"]}\n'
  for index, choice in enumerate(item["choice_list"]):
    prompt += f'{labels[index]} : {choice}\n'
  return prompt

def generate_prompt(gpt, mixtral, llama, prompt):
  prompts = []

  for index in range(120):
    basePrompt = initial_prompt(SP_test.loc[index], prompt)
    basePrompt += """\nCarefully review the following solutions from other agents as additional
information, and provide your own answer and step-by-step reasoning to the question.

Clearly state which point of view you agree or disagree with and why.\n\n"""

    basePrompt += f"Agent one thinks the Answer is {claude.loc[index]['Solution']}.\nagent one reason: {claude.loc[index]['Reasoning']}\nagent one confidence: {claude.loc[index]['Confidence']}\n"
    basePrompt += f"Agent two thinks the Answer is {mixtral.loc[index]['Solution']}.\nagent two reason: {mixtral.loc[index]['Reasoning']}\nagent two confidence: {mixtral.loc[index]['Confidence']}\n"
    basePrompt += f"Agent three thinks the Answer is {gpt.loc[index]['Solution']}.\nagent three reason: {gpt.loc[index]['Reasoning']}\nagent three confidence: {gpt.loc[index]['Confidence']}\n"
    prompts.append(basePrompt)

  return prompts


In [None]:
prompts = generate_prompt(gpt, mixtral, claude, prompt)

In [None]:
for index, pr in enumerate(prompts):
  print(index + 1)
  print(pr)
  print("##################################################################################")

1
Please answer the question with step-by-step reasoning. Also,evaluate your confidence level (between 0.0 and 1.0) to indicate the possibility of your answer being right. Please give the answer in this template:
Solution:
Reasoning:
Confidence:

Question: In a small village, two farmers are working in their fields - a diligent farmer and a lazy farmer. The hardworking farmer is the son of the lazy farmer, but the lazy farmer is not the father of the hardworking farmer. Can you explain this unusual relationship?
A : The lazy farmer is his mother.
B : The lazy farmer is not a responsible father as he is lazy.
C : The diligent farmer devoted himself to the farm and gradually forgot his father.
D : None of above.

Carefully review the following solutions from other agents as additional
information, and provide your own answer and step-by-step reasoning to the question.

Clearly state which point of view you agree or disagree with and why.

Agent one thinks the Answer is A.
agent one reaso

### Make round1 dataframe

In [None]:
claudeResult = []
count = 1
with open("/content/r1_answers.txt") as file_in:
    lines = []
    for line in file_in:
        if "Solution:" in line:
          if "The answer is" in line:
            Solution = line.split("The answer is ")[1][0]
          else:
            Solution = line.split("Solution: ")[1][0]
          count +=1
        elif "Reasoning:" in line:
          Reasoning = line.split('Reasoning: ')[1].split('\n')[0]
        elif "Confidence: " in line:
          Confidence = line.split('Confidence: ')[1].split('\n')[0]
          claudeResult.append({"Solution" : Solution, "Reasoning" : Reasoning, "Confidence" : Confidence})


In [None]:
claudeResultDF = pd.DataFrame(claudeResult)
claudeResultDF

Unnamed: 0,Solution,Reasoning,Confidence
0,A,"I agree with Agents 1, 2 and 3 that the answer...",0.95
1,C,"I agree with Agents 1, 2 and 3 that the answer...",0.9
2,C,I agree with Agents 1 and 2 that the answer is...,1.0
3,B,"I agree with Agents 1, 2 and 3 that the answer...",0.95
4,C,"I agree with Agents 1, 2 and 3 that the answer...",1.0
...,...,...,...
115,B,I agree with Agents One and Three that startin...,1.0
116,C,I concur with all three agents that an interne...,1.0
117,B,I agree with all three agents that a preacher ...,1.0
118,B,I concur with Agents Two and Three that only t...,1.0


In [None]:
claudeResultDF.to_csv('./Claude_R1.csv')

In [None]:
mapping = {'A' : 0, 'B' : 1, 'C' : 2, 'D' : 3}

for index, item in enumerate(claudeResultDF['Solution'].tolist()):
  print(mapping[item])

0
2
2
1
2
0
0
1
0
1
1
3
2
0
1
1
0
2
2
0
0
0
1
0
0
0
0
2
0
2
0
2
2
2
2
2
1
0
2
2
1
1
0
0
1
2
2
2
0
0
0
0
2
1
1
0
0
1
1
3
0
1
1
1
2
2
2
1
2
0
2
0
3
2
1
2
0
1
1
0
3
0
2
0
3
1
2
3
1
2
1
0
3
0
2
1
1
2
0
1
1
2
0
2
2
3
3
1
2
1
1
1
0
2
0
1
2
1
1
2


### Round 2

In [7]:
# import all round 1 answers:
gpt = pd.read_csv("./GPT3.5_R1.csv")
mixtral = pd.read_csv("./Mixtral_R1.csv")
claude = pd.read_csv("./Claude_R1.csv")

In [8]:
prompt = """Please answer the question with step-by-step reasoning. Also,evaluate your confidence level (between 0.0 and 1.0) to indicate the possibility of your answer being right. Please give the answer in this template:
Solution: label
Reasoning:
Confidence: confidence value

Question: """

def initial_prompt(item, prompt):

  labels = ["A", "B", "C", "D"]
  prompt += f'{item["question"]}\n'
  for index, choice in enumerate(item["choice_list"]):
    prompt += f'{labels[index]} : {choice}\n'
  return prompt

def generate_prompt(gpt, mixtral, llama, prompt):
  prompts = []

  for index in range(120):
    basePrompt = initial_prompt(SP_test.loc[index], prompt)
    basePrompt += """\nCarefully review the following solutions from other agents as additional
information, and provide your own answer and step-by-step reasoning to the question.

Clearly state which point of view you agree or disagree with and why.\n\n"""

    basePrompt += f"Agent one thinks the Answer is {claude.loc[index]['Solution']}.\nagent one reason: {claude.loc[index]['Reasoning']}\nagent one confidence: {claude.loc[index]['Confidence']}\n"
    basePrompt += f"Agent two thinks the Answer is {mixtral.loc[index]['Solution']}.\nagent two reason: {mixtral.loc[index]['Reasoning']}\nagent two confidence: {mixtral.loc[index]['Confidence']}\n"
    basePrompt += f"Agent three thinks the Answer is {gpt.loc[index]['Solution']}.\nagent three reason: {gpt.loc[index]['Reasoning']}\nagent three confidence: {gpt.loc[index]['Confidence']}\n"
    prompts.append(basePrompt)

  return prompts


In [9]:
prompts = generate_prompt(gpt, mixtral, claude, prompt)

In [10]:
for index, pr in enumerate(prompts):
  print(index + 1)
  print(pr)
  print("##################################################################################")

1
Please answer the question with step-by-step reasoning. Also,evaluate your confidence level (between 0.0 and 1.0) to indicate the possibility of your answer being right. Please give the answer in this template:
Solution: label
Reasoning:
Confidence: confidence value

Question: In a small village, two farmers are working in their fields - a diligent farmer and a lazy farmer. The hardworking farmer is the son of the lazy farmer, but the lazy farmer is not the father of the hardworking farmer. Can you explain this unusual relationship?
A : The lazy farmer is his mother.
B : The lazy farmer is not a responsible father as he is lazy.
C : The diligent farmer devoted himself to the farm and gradually forgot his father.
D : None of above.

Carefully review the following solutions from other agents as additional
information, and provide your own answer and step-by-step reasoning to the question.

Clearly state which point of view you agree or disagree with and why.

Agent one thinks the Answe

### make round 2 DataFrame

In [11]:
claudeResult = []
count = 1
with open("/content/r2_answers.txt") as file_in:
    lines = []
    for line in file_in:
        if "Solution:" in line:
          if "The answer is" in line:
            Solution = line.split("The answer is ")[1][0]
          else:
            Solution = line.split("Solution: ")[1][0]
          count +=1
        elif "Reasoning:" in line:
          Reasoning = line.split('Reasoning: ')[1].split('\n')[0]
        elif "Confidence: " in line:
          Confidence = line.split('Confidence: ')[1].split('\n')[0]
          claudeResult.append({"Solution" : Solution, "Reasoning" : Reasoning, "Confidence" : Confidence})


In [12]:
claudeResultDF = pd.DataFrame(claudeResult)
claudeResultDF

Unnamed: 0,Solution,Reasoning,Confidence
0,A,"I agree with Agents 1, 2 and 3 that Option A i...",0.95
1,C,"I agree with Agents 1, 2 and 3 that Option C i...",0.95
2,C,I agree with Agents 1 and 2 that Option C is t...,1.0
3,B,"I agree with Agents 1, 2 and 3 that Option B i...",0.95
4,C,"I agree with Agents 1, 2 and 3 that Option C i...",1.0
...,...,...,...
115,B,I agree with Agents One and Three that startin...,1.0
116,C,I concur with all three agents that the intern...,1.0
117,B,I agree with all three agents that a preacher ...,1.0
118,B,I concur with Agents Two and Three that only t...,1.0


In [13]:
claudeResultDF.to_csv('./Claude_R2.csv')

In [14]:
mapping = {'A' : 0, 'B' : 1, 'C' : 2, 'D' : 3}

for index, item in enumerate(claudeResultDF['Solution'].tolist()):
  print(mapping[item])

0
2
2
1
2
0
0
1
0
1
1
3
2
0
1
1
0
2
2
0
0
0
1
0
0
0
0
2
0
2
0
2
2
2
2
2
1
0
2
2
1
1
0
0
1
2
2
2
0
0
0
0
2
1
1
0
0
1
1
3
0
1
1
1
2
0
2
1
2
0
2
0
3
2
1
2
0
1
1
0
3
0
1
0
3
1
2
3
1
2
3
2
3
0
2
1
1
2
0
1
1
2
0
2
2
2
3
1
2
1
1
1
0
2
0
1
2
1
1
2
