In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data Processing of GSM8k

In [2]:
COT_FULL_prompt = """Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there
will be 21 trees. How many trees did the grove workers plant today?
A: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have
been 21 - 15 = 6. The answer is 6.
Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
A: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. The answer is 5.
Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
A: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74. After eating 35, they
had 74 - 35 = 39. The answer is 39.
Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did
Jason give to Denny?
A: Jason started with 20 lollipops. Then he had 12 after giving some to Denny. So he gave Denny 20 - 12 = 8.
The answer is 8.
Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he
have now?
A: Shawn started with 5 toys. If he got 2 toys each from his mom and dad, then that is 4 more toys. 5 + 4 = 9.
The answer is 9.
Q: There were nine computers in the server room. Five more computers were installed each day, from monday
to thursday. How many computers are now in the server room?
A: There were originally 9 computers. For each of 4 days, 5 more computers were added. So 5 * 4 = 20
computers were added. 9 + 20 is 29. The answer is 29.
Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf
balls did he have at the end of wednesday?
A: Michael started with 58 golf balls. After losing 23 on tuesday, he had 58 - 23 = 35. After losing 2 more, he
had 35 - 2 = 33 golf balls. The answer is 33.
Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
A: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. So she has 23 - 15 dollars left. 23
- 15 is 8. The answer is 8."""

Standard_FULL_prompt = """Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there
will be 21 trees. How many trees did the grove workers plant today?
A: The answer is 6.
Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
A: The answer is 5.
Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
A: The answer is 39.
Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did
Jason give to Denny?
A: The answer is 8.
Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he
have now?
A: The answer is 9.
Q: There were nine computers in the server room. Five more computers were installed each day, from monday
to thursday. How many computers are now in the server room?
A: The answer is 29.
Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf
balls did he have at the end of wednesday?
A: The answer is 33.
Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
A: The answer is 8."""

In [3]:
GSM8K_path = "./Data/gsm8k_test1.csv"

In [4]:
GSM8K_df = pd.read_csv(GSM8K_path)

In [5]:
def get_pair_GSM8K(s):
    q = f"""Q: {s["question"]}"""
    a = s["answer"].split("\n")[-1].lstrip("# ")
    return (q,a)

In [6]:
def get_n_shot_prompt(Prompt,n_shot=8):
    new_prompt = Prompt.replace("\nQ:","<new>Q:").replace("\nA:","<new>A:").replace("\n"," ").replace("<new>","\n")
    n_shot_prompt = new_prompt.split("\n")[:2*n_shot]
    return n_shot_prompt

In [7]:
def get_llama_structure(n_shot_prompt,current_question):
    assert len(n_shot_prompt)%2==0
    n_shot_structure = "".join([f'''<s>[INST]{n_shot_prompt[i]}[/INST]{n_shot_prompt[i+1]}</s>''' for i in range(0,len(n_shot_prompt),2)])
    return f'''{n_shot_structure}<s>[INST]{current_question}[/INST]'''

In [8]:
def get_final_prompts(full_prompt,n_shot,n_examples):
    final_prompts = []
    final_answers = []
    for idx in range(n_examples):
        q,a = get_pair_GSM8K(GSM8K_df.iloc[idx])
        n_shot_prompt = get_n_shot_prompt(full_prompt,n_shot)
        final_prompts.append(get_llama_structure(n_shot_prompt,q)) 
        final_answers.append(a)
    return final_prompts,final_answers

In [25]:
n_examples = len(GSM8K_df)
for full_prompt in [(COT_FULL_prompt,"cot"),(Standard_FULL_prompt,"std")]:
    for n_shot in range(0,9):
        prompts,ans = get_final_prompts(full_prompt[0],n_shot,n_examples)
        file_name = f"gcp_data_inp_gsm8k/{full_prompt[1]}-{n_shot}-{n_examples}.csv"
        pd.DataFrame({0:prompts,1:ans}).to_csv(file_name)

In [22]:
def get_prompts_and_answers(file_path):
    df = pd.read_csv(file_path,index_col=0)
    return list(df["0"]),list(df["1"])

# Evaluation of GSM8k

In [214]:
data_inp = "./gcp_data_inp_gsm8k/std-6-1319.csv"
data_out = "./gcp_data_out_gsm8k/std-6-1319.csv"

in_df = pd.read_csv(data_inp,index_col=0)
out_df = pd.read_csv(data_out,index_col=0)

In [215]:
def extract_answers(idx):
    s1 = in_df["0"][idx][3:]
    s2 = out_df["0"][idx]
    ma = s2[len(s1):].split("</s>")[0]
    gt = out_df["1"][idx]
    for i in ma.split(" ")[::-1]:
        try:
            k = i.strip("$%. ")
            k = k.replace(",","")
            k = int(float(k))
        except:continue
        else:break
    return k,gt

In [216]:
for idx in range(2):
    print("index:",idx,"answers(model,gt)",extract_answers(idx))

index: 0 answers(model,gt) (32, '18')
index: 1 answers(model,gt) (4, '3')
index: 2 answers(model,gt) (30000, '70000')
index: 3 answers(model,gt) (180, '540')
index: 4 answers(model,gt) (30, '20')
index: 5 answers(model,gt) (80, '64')
index: 6 answers(model,gt) (140, '260')
index: 7 answers(model,gt) (240, '160')
index: 8 answers(model,gt) (2, '45')
index: 9 answers(model,gt) (540, '460')
index: 10 answers(model,gt) (180, '366')
index: 11 answers(model,gt) (282, '694')
index: 12 answers(model,gt) (6, '13')
index: 13 answers(model,gt) (15, '18')
index: 14 answers(model,gt) (40, '60')
index: 15 answers(model,gt) (1, '125')
index: 16 answers(model,gt) (230, '230')
index: 17 answers(model,gt) (45000, '57500')
index: 18 answers(model,gt) (12, '7')
index: 19 answers(model,gt) (6, '6')
index: 20 answers(model,gt) (23, '15')
index: 21 answers(model,gt) (3, '14')
index: 22 answers(model,gt) (10, '7')
index: 23 answers(model,gt) (2, '8')
index: 24 answers(model,gt) (25, '26')
index: 25 answers(mo

In [217]:
idx=10
s1 = in_df["0"][idx][3:]
s2 = out_df["0"][idx]
ma = s2[len(s1):].split("</s>")[0]
print(f"START-idx{idx}******************")
# print(s1)
# print("*"*10)
# print(s2)
# print("*"*10)
print(ma)
print("*"*10)
print(out_df["1"][idx])
print(f"END-idx{idx}******************\n\n")

START-idx10******************
A: The answer is 180.
**********
366
END-idx10******************




In [299]:
COT_FULL_prompt = """Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there
will be 21 trees. How many trees did the grove workers plant today?
A: There are 15 trees originally. Then there were 21 trees after some more were planted. So there must have
been 21 - 15 = 6. The answer is 6.
Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
A: There are originally 3 cars. 2 more cars arrive. 3 + 2 = 5. The answer is 5.
Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
A: Originally, Leah had 32 chocolates. Her sister had 42. So in total they had 32 + 42 = 74. After eating 35, they
had 74 - 35 = 39. The answer is 39.
Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did
Jason give to Denny?
A: Jason started with 20 lollipops. Then he had 12 after giving some to Denny. So he gave Denny 20 - 12 = 8.
The answer is 8.
Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he
have now?
A: Shawn started with 5 toys. If he got 2 toys each from his mom and dad, then that is 4 more toys. 5 + 4 = 9.
The answer is 9.
Q: There were nine computers in the server room. Five more computers were installed each day, from monday
to thursday. How many computers are now in the server room?
A: There were originally 9 computers. For each of 4 days, 5 more computers were added. So 5 * 4 = 20
computers were added. 9 + 20 is 29. The answer is 29.
Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf
balls did he have at the end of wednesday?
A: Michael started with 58 golf balls. After losing 23 on tuesday, he had 58 - 23 = 35. After losing 2 more, he
had 35 - 2 = 33 golf balls. The answer is 33.
Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
A: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. So she has 23 - 15 dollars left. 23
- 15 is 8. The answer is 8."""

Standard_FULL_prompt = """Q: There are 15 trees in the grove. Grove workers will plant trees in the grove today. After they are done, there
will be 21 trees. How many trees did the grove workers plant today?
A: The answer is 6.
Q: If there are 3 cars in the parking lot and 2 more cars arrive, how many cars are in the parking lot?
A: The answer is 5.
Q: Leah had 32 chocolates and her sister had 42. If they ate 35, how many pieces do they have left in total?
A: The answer is 39.
Q: Jason had 20 lollipops. He gave Denny some lollipops. Now Jason has 12 lollipops. How many lollipops did
Jason give to Denny?
A: The answer is 8.
Q: Shawn has five toys. For Christmas, he got two toys each from his mom and dad. How many toys does he
have now?
A: The answer is 9.
Q: There were nine computers in the server room. Five more computers were installed each day, from monday
to thursday. How many computers are now in the server room?
A: The answer is 29.
Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf
balls did he have at the end of wednesday?
A: The answer is 33.
Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?
A: The answer is 8."""

# Data Processing of Aqua

In [287]:
COT_FULL_prompt = '''Q: John found that the average of 15 numbers is 40. If 10 is added to each number then the mean of the numbers is?
Answer Choices:
A) 50
B) 45
C) 65
D) 78 
E) 64
<split>A: If 10 is added to each number, then the mean of the numbers also increases by 10. So the new mean would be 50. The answer is A.
<split>Q: If a / b = 3/4 and 8a + 5b = 22,then find the value of a.
Answer Choices:
A) 1/2
B) 3/2 
C) 5/2 
D) 4/2 
E) 7/2
<split>A: If a / b = 3/4, then b = 4a / 3. So 8a + 5(4a / 3) = 22. This simplifies to 8a + 20a / 3 = 22, which means 44a / 3 = 22. So a is equal to 3/2. The answer is B.
<split>Q: A person is traveling at 20 km/hr and reached his destiny in 2.5 hr then find the distance?
Answer Choices:
A) 53 km
B) 55 km 
C) 52 km 
D) 60 km 
E) 50 km
<split>A: The distance that the person traveled would have been 20 km/hr * 2.5 hrs = 50 km. The answer is E.
<split>Q: How many keystrokes are needed to type the numbers from 1 to 500?
Answer Choices:
A) 1156
B) 1392 
C) 1480 
D) 1562 
E) 1788
<split>A: There are 9 one-digit numbers from 1 to 9. There are 90 two-digit numbers from 10 to 99. There are 401 three-digit numbers from 100 to 500. 9 + 90(2) + 401(3) = 1392. The answer is B.'''

Standard_FULL_prompt = '''Q: John found that the average of 15 numbers is 40. If 10 is added to each number then the mean of the numbers is?
Answer Choices:
A) 50
B) 45
C) 65
D) 78 
E) 64
<split>A: The answer is A.
<split>Q: If a / b = 3/4 and 8a + 5b = 22,then find the value of a.
Answer Choices:
A) 1/2
B) 3/2 
C) 5/2 
D) 4/2 
E) 7/2
<split>A: The answer is B.
<split>Q: A person is traveling at 20 km/hr and reached his destiny in 2.5 hr then find the distance?
Answer Choices:
A) 53 km
B) 55 km 
C) 52 km 
D) 60 km 
E) 50 km
<split>A: The answer is E.
<split>Q: How many keystrokes are needed to type the numbers from 1 to 500?
Answer Choices:
A) 1156
B) 1392 
C) 1480 
D) 1562 
E) 1788
<split>A: The answer is B.'''

In [288]:
AQUA_path = "./Data/AQUA1.csv"
AQUA_df = pd.read_csv(AQUA_path)

In [289]:
def get_qa_pair(idx):
    s = AQUA_df.iloc[idx]
    op = "\n".join([_.strip("['] ").replace(" )",")") for _ in s["options"].split(",")])
    q = f"{s['question']}\nAnswer Choices:\n{op}"
    a = s["correct"]
    return q,a

def get_n_shot_prompt(prompt,n_shot=2):
    # new_prompt = prompt.replace("\nQ:","<new>Q:").replace("\nA:","<new>A:").replace("\n"," ").replace("<new>","\n")
    return prompt.split("<split>")[:2*n_shot]

In [290]:
def get_llama_structure(n_shot_prompt,current_question):
    assert len(n_shot_prompt)%2==0
    n_shot_structure = "".join([f'''<s>[INST]{n_shot_prompt[i].strip()}[/INST]{n_shot_prompt[i+1].strip()}</s>''' for i in range(0,len(n_shot_prompt),2)])
    return f'''{n_shot_structure}<s>[INST]{current_question}[/INST]'''

In [291]:
def get_final_prompts(full_prompt,n_shot,n_examples):
    final_prompts = []
    final_answers = []
    for idx in range(n_examples):
        q,a = get_qa_pair(idx)
        n_shot_prompt = get_n_shot_prompt(full_prompt,n_shot)
        final_prompts.append(get_llama_structure(n_shot_prompt,q)) 
        final_answers.append(a)
    return final_prompts,final_answers

In [292]:
n_examples = len(AQUA_df)
for full_prompt in [(COT_FULL_prompt,"cot"),(Standard_FULL_prompt,"std")]:
    for n_shot in range(0,5):
        prompts,ans = get_final_prompts(full_prompt[0],n_shot,n_examples)
        file_name = f"gcp_data_inp_aqua/{full_prompt[1]}-{n_shot}-{n_examples}.csv"
        pd.DataFrame({0:prompts,1:ans}).to_csv(file_name)

In [293]:
def get_prompts_and_answers(file_path):
    df = pd.read_csv(file_path,index_col=0)
    return list(df["0"]),list(df["1"])

In [295]:
prompts,ans = get_prompts_and_answers("./gcp_data_inp_aqua/cot-3-254.csv")

# Evaluation Aqua

In [229]:
data_inp = "./gcp_data_inp_aqua/cot-4-254.csv"
data_out = "./gcp_data_out_aqua/cot-4-254.csv"

in_df = pd.read_csv(data_inp,index_col=0)
out_df = pd.read_csv(data_out,index_col=0)

In [233]:
idx=0
s1 = in_df["0"][idx][3:]
s2 = out_df["0"][idx]
ma = s2[len(s1):].split("</s>")[0]
print(f"START-idx{idx}******************")
# print(s1)
# print("*"*10)
# print(s2)
# print("*"*10)
print(ma)
print("*"*10)
print(out_df["1"][idx])
print(f"END-idx{idx}******************\n\n")

START-idx0******************
A: The angle of elevation changes from 45° to 60° in 10 minutes, which means the car is moving at a uniform speed. The speed of the car can be calculated as follows:
Speed = Distance / Time
= 100 m / 10 min
= 10 m/min
Now, we need to find the time it takes for the car to reach the base of the tower. We can use the formula:
Time = Distance / Speed
= 100 m / 10 m/min
= 10 min
Therefore, the car will reach the base of the tower in 10 minutes after it passes the top of the tower. The answer is A.
**********
A
END-idx0******************


