In [1]:
from openai import OpenAI
# Uncomment the following line to get the API key from the user data
# if you run this in colab
# from google.colab import userdata
# OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

# if you run this in local
import os
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')


In [16]:
import requests
import json

# Replace with the actual URL of your JSON file on GitHub
github_url = "https://raw.githubusercontent.com/google/BIG-bench/refs/heads/main/bigbench/benchmark_tasks/causal_judgment/task.json"

try:
  response = requests.get(github_url)
  response.raise_for_status()  # Raise an exception for bad status codes

  data = json.loads(response.text)

  # Now you can work with the parsed JSON data
  # print(data)

except requests.exceptions.RequestException as e:
  print(f"Error downloading or parsing JSON: {e}")
  

In [13]:
# Uncomment the following line if you are using google colab
# from google.colab import drive

import json

def save_dict_to_drive(data, filename):
  """Saves a dictionary to a JSON file in Google Drive.

  Args:
    data: The dictionary to save.
    filename: The name of the file to save to (e.g., 'my_data.json').
  """
  # Uncomment the following lines if you are using google colab
  # drive.mount('/content/drive')
  # filepath = f'/content/drive/MyDrive/Colab/{filename}'

  # If you are not using google colab, you can save the file in the current directory
  filepath = f'./output/{filename}'
  with open(filepath, 'w') as f:
    json.dump(data, f, indent=4)
  print(f"Dictionary saved to: {filepath}")

def load_dict_from_drive(filename):
  """Loads a dictionary from a JSON file in Google Drive.

  Args:
    filename: The name of the file to load from (e.g., 'my_data.json').

  Returns:
    The loaded dictionary, or None if the file is not found.
  """
  # Uncomment the following lines if you are using google colab
  # drive.mount('/content/drive')
  # filepath = f'/content/drive/MyDrive/Colab/{filename}'

  # If you are not using google colab, you can load the file from the current directory
  filepath = f'./output/{filename}'
  try:
    with open(filepath, 'r') as f:
      data = json.load(f)
    print(f"Dictionary loaded from: {filepath}")
    return data
  except FileNotFoundError:
    print(f"File not found: {filepath}")
    return None


# Example usage:
# my_dict = {'key1': 'value1', 'key2': 'value2'}
# save_dict_to_drive(my_dict, 'my_dictionary.json')

# loaded_dict = load_dict_from_drive('my_dictionary.json')
# if loaded_dict:
#   print(loaded_dict)


In [18]:
save_dict_to_drive(data, 'task_patched.json')
data = load_dict_from_drive('task_patched.json')

Dictionary saved to: ./output/task_patched.json
Dictionary loaded from: ./output/task_patched.json


In [14]:
system_prompt = """
You will be presented with a text, the text includes a question,
your task is to identify the question and extract it. Your output should be:

The question is: <question>
where <question> is the extracted question from the text.
"""

model_params = {
    "model": "gpt-4o-mini",
    "messages": [
        {"role": "system", "content": system_prompt},
        {
            "role": "user",
            "content": "example['input']"
        }
    ]
}

print(model_params['messages'][0])

{'role': 'system', 'content': '\nYou will be presented with a text, the text includes a question,\nyour task is to identify the question and extract it. Your output should be:\n\nThe question is: <question>\nwhere <question> is the extracted question from the text.\n'}


In [19]:
import re
import copy
client = OpenAI(
    api_key = OPENAI_API_KEY,
)

for idx, example in enumerate(data['examples']):
  print(f"Processing example {idx+1}/{len(data['examples'])}")
  q = example['input']
  completion = client.chat.completions.create(
      model=model_params['model'],
      messages=[
          model_params['messages'][0],
          {
              "role": "user",
              "content": example['input']
          }
      ]
  )
  response = completion.choices[0].message.content
  # Remove "The question is:" from the response
  response = re.sub(r'^The question is:\s*', '', response, flags=re.IGNORECASE)
  print(response)
  example['input_question'] = copy.deepcopy(response)

  if idx == 2:
    # Uncomment to stop the execution after the first 2 questions
    #break
    pass


Processing example 1/190
Did the CEO intentionally harm the environment?
Processing example 2/190
Did the CEO intentionally help the environment?
Processing example 3/190
Did Jane cause the computer to crash?
Processing example 4/190
Did Jane cause the computer to crash?
Processing example 5/190
Did Professor Smith cause the problem?
Processing example 6/190
Did the administrative assistant cause the problem?
Processing example 7/190
Did Daniel cause the computer crash?
Processing example 8/190
Did Claire cause the computer crash?
Processing example 9/190
Did the lieutenant intentionally put the soldiers in the line of fire?
Processing example 10/190
Did the lieutenant intentionally take the soldiers out of the line of fire?
Processing example 11/190
Did Jake intentionally hit the bull's-eye?
Processing example 12/190
Did Jake intentionally hit the bull's-eye?
Processing example 13/190
Did the pharmacist's decision cause the patient's recovery?
Processing example 14/190
Did the attendi

In [30]:
save_dict_to_drive(data, 'task_patched.json')

Dictionary saved to: ./output/task_patched.json


In [29]:
for idx, example in enumerate(data['examples']):
  print(f"Processing example {idx+1}/{len(data['examples'])}")
  q = example['input_question']
  print(example['input'])
  print(q)
  text = example['input'].replace(q, '').strip()
  print(text)
  example['input_text'] = copy.deepcopy(text)

  if idx == 2:
    # Uncomment to stop the execution after the first 2 questions
    #break
    pass


Processing example 1/190
The CEO of a company is sitting in his office when his Vice President of R&D comes in and says, 'We are thinking of starting a new programme. It will help us increase profits, but it will also harm the environment.' The CEO responds that he doesn't care about harming the environment and just wants to make as much profit as possible. The programme is carried out, profits are made and the environment is harmed. Did the CEO intentionally harm the environment?
Did the CEO intentionally harm the environment?
The CEO of a company is sitting in his office when his Vice President of R&D comes in and says, 'We are thinking of starting a new programme. It will help us increase profits, but it will also harm the environment.' The CEO responds that he doesn't care about harming the environment and just wants to make as much profit as possible. The programme is carried out, profits are made and the environment is harmed.
Processing example 2/190
The CEO of a company is sitt