# Generating result

## Set model

In [1]:
!pip install -q -U google-generativeai

In [2]:
import pathlib
import google.generativeai as genai

In [3]:
from google.colab import userdata
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [4]:
for m in genai.list_models():
  if 'generateContent' in m.supported_generation_methods:
    print(m.name)

models/gemini-1.0-pro
models/gemini-1.0-pro-001
models/gemini-1.0-pro-latest
models/gemini-1.0-pro-vision-latest
models/gemini-pro
models/gemini-pro-vision


In [5]:
model = genai.GenerativeModel('gemini-1.0-pro-001')

## generate text response/chat response and write it to csv

In [6]:
import os
import pandas as pd
from tqdm import tqdm
import time
from vertexai.preview.generative_models import (
    HarmCategory,
    HarmBlockThreshold )
from google.cloud.aiplatform_v1beta1.types.content import SafetySetting

def getGemini10Pro001Response(prompt):
    # sleep to avoid api limits, should take ~15 minutes in total
    # time.sleep(1)
    response = model.generate_content(prompt).text
    return response

def getLLMResponse(prompt):
    return getGemini10Pro001Response(prompt)

def splitQuestion(question):
    sentences = question.split('. ')
    last_sentence = sentences[-1]
    # Remove the last sentence
    sentences.pop(-1)
    # Join the remaining
    new_question = '. '.join(sentences)
    return last_sentence, new_question
def getGeminiChatResponseNoSub(model,context,question): #model type / large question in str/ subquestions in array
    header_instruction = ''' Consider this fictional board game scenario and the rules defined for the scenario: ''' ##### the one for starting the chat
    # header_last_instruction=''' Given this, wait for the subproblems I give you and answer them accordingly   '''
    final_instruction = ''' Given this, answer the final question: ''' ### what's the best answer etc...
    # last_sentence, new_question = splitQuestion(question)
    safety_settings : list[str] = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]
    chat = model.start_chat(history=[])
    response = chat.send_message(header_instruction + context + final_instruction + question,
                                 safety_settings=safety_settings)
    # return response.text
    history =""
    for message in chat.history:
        history += message.role + ": " + message.parts[0].text + "\n"
    return history, response.text

def getGeminiChatResponse(model,context,question,subquestions): #model type / large question in str/ subquestions in array
    header_instruction = ''' Consider this fictional board game scenario and the rules defined for the scenario: ''' ##### the one for starting the chat
    header_last_instruction=''' Given this, wait for the subproblems I give you and answer them accordingly ok?  '''
    final_instruction = ''' Given the answers you have generated for the subproblems, answer the final question: ''' ### what's the best answer etc...
    # last_sentence, new_question = splitQuestion(question)
    safety_settings : list[str] = [{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
                    {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"}]
    chat = model.start_chat(history=[])
    response = chat.send_message(header_instruction + context +header_last_instruction,safety_settings=safety_settings )
    for subs in subquestions:
        response = chat.send_message(subs,safety_settings=safety_settings)
    response  = chat.send_message(final_instruction + question,safety_settings=safety_settings)
    # return response.text
    history =""
    for message in chat.history:
        history += message.role + ": " + message.parts[0].text + "\n"
    return history, response.text

In [39]:
import csv
import json
def getBaseAnswers(dataDf):
    # Specify the path for the output CSV file
    output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'

    # Open the CSV file for writing
    with open(output_csv_path, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # Write the header row
        writer.writerow(['Example', 'Response'])
        count = 0
        for index, row in tqdm(dataDf.iterrows(), total=dataDf.shape[0]):
            # Construct the prompt from the 'example' column
            prompt = row['example'] + "\n The label is what (proved, disproved, unknown)?"
            # Get the response from your API call function
            response = getLLMResponse(prompt)
            # Write the current 'example' and its 'response' to the CSV
            writer.writerow([row['example'], response])
            count += 1
            print("The response for " + str(count) + "is: ",response)

def getChatResponse(dataDf):
    # Specify the path for the output CSV file
    output_csv_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'

    # Open the CSV file for writing
    with open(output_csv_path, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        # # Write the header row
        # writer.writerow(['context','question','answer', 'chat'])
        count = 0
        for index, row in tqdm(dataDf.iloc[993:].iterrows(), total=dataDf.shape[0]-993):
            context = row['context']
            question = row['question'] + "\n Give me a one word answer (proved, disproved, unknown)"
            subquestion = row["llama-subproblems"]
            if not subquestion:
                print("no subs")
                history,response = getGeminiChatResponseNoSub(model,context,question)
            else:
                subquestions = subquestion.split("||")
                print("have subs")
                history,response = getGeminiChatResponse(model,context,question,subquestions)
            # Get the response from your API call function
            # Write the current 'example' and its 'response' to the CSV
            writer.writerow([context, question,response,history])
            count += 1
            # print("The response for " + str(count) + "is: ",response)

In [43]:
def main():
    # dataPath = "/content/drive/MyDrive/gemini/data/test.json"
    # df = pd.read_json(dataPath, dtype=str)
    # getBaseAnswers(df)
     dataPath = "/content/drive/MyDrive/gemini/data/output-with-subproblems.json"
     df = pd.read_json(dataPath, dtype=str)
     getChatResponse(df)

if __name__ == "__main__":
    main()


  0%|          | 0/7 [00:00<?, ?it/s]

have subs


 14%|█▍        | 1/7 [00:19<01:59, 19.89s/it]

have subs


ERROR:tornado.access:503 POST /v1beta/models/gemini-1.0-pro-001:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 1369.91ms
 29%|██▊       | 2/7 [00:35<01:26, 17.25s/it]

no subs


 43%|████▎     | 3/7 [00:36<00:39,  9.99s/it]

have subs


 57%|█████▋    | 4/7 [00:52<00:36, 12.13s/it]

have subs


 71%|███████▏  | 5/7 [01:04<00:24, 12.29s/it]

have subs


 86%|████████▌ | 6/7 [01:18<00:12, 12.69s/it]

have subs


100%|██████████| 7/7 [01:31<00:00, 13.14s/it]


#Process Answer

## Check Header

### subs

In [45]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    header = next(reader)
    print(f"Header: {header}")


Header: ['context', 'question', 'answer', 'chat']


### base

In [109]:
file_path2 = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'

with open(file_path2, mode='r', encoding='utf-8') as file:
    reader = csv.reader(file)
    header = next(reader)
    print(f"Header: {header}")

Header: ['Example', 'Response']


## Change all answer to lower

### subs

In [49]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'
column_name = 'answer'

# Read the CSV and modify the target column
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    headers = reader.fieldnames
    for row in reader:
        row[column_name] = row[column_name].lower()  # Convert to lowercase
        modified_rows.append(row)

# Write the modified content back to the CSV
with open(file_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    writer.writeheader()
    writer.writerows(modified_rows)

### base

In [111]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'
column_name = 'Response'

# Read the CSV and modify the target column
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    headers = reader.fieldnames
    for row in reader:
        row[column_name] = row[column_name].lower()  # Convert to lowercase
        modified_rows.append(row)

# Write the modified content back to the CSV
with open(file_path, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    writer.writeheader()
    writer.writerows(modified_rows)

##Check the answer and update the csv

### subs

In [99]:
import re
file_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'
column_name = 'answer'
def contains_all_three(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_proved and has_disproved and has_unknown
def contains_both_proved_disproved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_proved and has_disproved
def contains_proved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is not None
    return has_proved
def contains_disproved(text):
    # Regular expression pattern to match whole words only
    disproved_pattern = r'\bdisproved\b'
    # Search for both patterns in the text
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_disproved
def contains_unknown(text):
    # Regular expression pattern to match whole words only
    unknown_pattern = r'\bunknown\b'
    # Search for both patterns in the text
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_unknown
def contains_none(text):
# Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is None
    has_disproved = re.search(disproved_pattern, text) is None
    has_unknown = re.search(unknown_pattern, text) is None
    return has_proved and has_disproved and has_unknown
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    countb = 0
    countp = 0
    countd = 0
    countu = 0
    counta = 0
    countn = 0
    for index, row in enumerate(reader, start=1):
        column_value = row[column_name]
        if contains_all_three(column_value):
            counta +=1
        if contains_none(column_value):
            print("the index for none: ",index)
            countn +=1
            row[column_name] = "proved"
        if contains_both_proved_disproved(column_value):
            countb +=1
        if contains_proved(column_value):
            countp +=1
            row[column_name] = "proved"
        if contains_disproved(column_value):
            countd +=1
            row[column_name] = "disproved"
        if contains_unknown(column_value):
            countu +=1
            row[column_name] = "unknown"
        modified_rows.append(row)
    print("all three: ",counta)
    print("none: ",countn)
    print("both p and d: ",countb)
    print("proved: ",countp)
    print("disproved: ",countd)
    print("unknown: ",countu)
    print("total: ",len(modified_rows))
with open("/content/drive/MyDrive/gemini/data/updated_final_results.csv", mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=reader.fieldnames)
    writer.writeheader()
    writer.writerows(modified_rows)

the index for none:  153
all three:  0
none:  1
both p and d:  0
proved:  286
disproved:  411
unknown:  302
total:  1000


### base

In [113]:
import re
file_path = '/content/drive/MyDrive/gemini/data/Gemini_results.csv'
column_name = 'Response'
def contains_all_three(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_proved and has_disproved and has_unknown
def contains_both_proved_disproved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is not None
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_proved and has_disproved
def contains_proved(text):
    # Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is not None
    return has_proved
def contains_disproved(text):
    # Regular expression pattern to match whole words only
    disproved_pattern = r'\bdisproved\b'
    # Search for both patterns in the text
    has_disproved = re.search(disproved_pattern, text) is not None
    return has_disproved
def contains_unknown(text):
    # Regular expression pattern to match whole words only
    unknown_pattern = r'\bunknown\b'
    # Search for both patterns in the text
    has_unknown = re.search(unknown_pattern, text) is not None
    return has_unknown
def contains_none(text):
# Regular expression pattern to match whole words only
    proved_pattern = r'\bproved\b'
    disproved_pattern = r'\bdisproved\b'
    unknown_pattern = r'\bunknown\b'
    # Search for both patterns in the text
    has_proved = re.search(proved_pattern, text) is None
    has_disproved = re.search(disproved_pattern, text) is None
    has_unknown = re.search(unknown_pattern, text) is None
    return has_proved and has_disproved and has_unknown
modified_rows = []
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    countb = 0
    countp = 0
    countd = 0
    countu = 0
    counta = 0
    countn = 0
    for index, row in enumerate(reader, start=1):
        column_value = row[column_name]
        if contains_all_three(column_value):
            counta +=1
        if contains_none(column_value):
            countn +=1
        if contains_both_proved_disproved(column_value):
            countb +=1
        if contains_proved(column_value):
            countp +=1
            row[column_name] = "proved"
        if contains_disproved(column_value):
            countd +=1
            row[column_name] = "disproved"
        if contains_unknown(column_value):
            countu +=1
            row[column_name] = "unknown"
        modified_rows.append(row)
    print("all three: ",counta)
    print("none: ",countn)
    print("both p and d: ",countb)
    print("proved: ",countp)
    print("disproved: ",countd)
    print("unknown: ",countu)
    print("total: ",len(modified_rows))
with open("/content/drive/MyDrive/gemini/data/updated_gemini_results.csv", mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=reader.fieldnames)
    writer.writeheader()
    writer.writerows(modified_rows)

all three:  0
none:  0
both p and d:  0
proved:  357
disproved:  443
unknown:  200
total:  1000


##Checked the one didn't contain any of the three for subs

In [98]:
file_path = '/content/drive/MyDrive/gemini/data/Gemini_final_results.csv'
column_name = 'answer'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for index, row in enumerate(reader):
        if index ==152:
            print(row[column_name])


proven


## Recheck the answer for subs

In [None]:
import csv

file_path = '/content/drive/MyDrive/gemini/data/updated_final_results.csv'
column_name = 'answer'

with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    count = 1
    for row in reader:
        print(str(count) + ": " + row[column_name])
        count +=1

# Accuracy check

## Using subs

In [107]:
dataPath = "/content/drive/MyDrive/gemini/data/output-with-subproblems.json"
dataDf = pd.read_json(dataPath, dtype=str)
file_path = '/content/drive/MyDrive/gemini/data/updated_final_results.csv'
column_name = 'answer'
label = []
predicted = []
for index, row in tqdm(dataDf.iterrows(), total=dataDf.shape[0]):
    label.append(row['label'])
with open(file_path, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        predicted.append(row[column_name])
print(len(predicted))

100%|██████████| 1000/1000 [00:00<00:00, 21595.63it/s]

1000





In [115]:
if len(label) == len(predicted):
    matches = sum(1 for label_value, predicted_value in zip(label, predicted) if label_value == predicted_value)
    accuracy = matches / len(label)
    print(f"Accuracy using subproblems: {accuracy:.2%}")
else:
    print("Error: The lists 'label' and 'predicted' have different lengths.")

Accuracy using subproblems: 51.80%


## The base answer

In [114]:
file_path2 = '/content/drive/MyDrive/gemini/data/updated_gemini_results.csv'
column_name = 'Response'
predicted2 = []
with open(file_path2, mode='r', encoding='utf-8') as file:
    reader = csv.DictReader(file)
    for row in reader:
        predicted2.append(row[column_name])
print(len(predicted2))

1000


In [116]:
if len(label) == len(predicted2):
    matches = sum(1 for label_value, predicted_value in zip(label, predicted2) if label_value == predicted_value)
    accuracy = matches / len(label)
    print(f"Accuracy without using subproblems: {accuracy:.2%}")
else:
    print("Error: The lists 'label' and 'predicted' have different lengths.")

Accuracy without using subproblems: 45.20%
