In [21]:
import os 
import json
import pandas as pd

import concurrent.futures
import re
import ast


def extract_matrix(text):
    # 使用正则提取最外层矩阵
    match = re.search(r'\[\[.*?\]\]', text, re.DOTALL)
    if match:
        matrix_str = match.group(0)  # 获取矩阵部分
        # 去除注释部分
        matrix_str_no_comments = re.sub(r'#.*', '', matrix_str)  # 删除 # 及之后的内容
        matrix_str_cleaned = re.sub(r'\s+', ' ', matrix_str_no_comments)  # 删除多余的空白字符
        return ast.literal_eval(matrix_str_cleaned)  # 转为 Python 列表
    return None 

def get_causal(text):
    # 确保输入是字符串
    if not isinstance(text, str):
        return None  # 如果不是字符串，返回 None

    # 使用正则表达式获取第一个 ``` 和最后一个 ``` 之间的子字符串
    causal_graph = re.search(r"```(.*?)```", text, re.DOTALL)
    
    if causal_graph:
        causal_graph = extract_matrix(causal_graph.group(1))
        return causal_graph # 返回匹配的内容
    return None  # 如果没有匹配到，返回 None



#merge json files
def merge_json_files(json_files):
    result = []
    for file in json_files:
        with open(file, 'r') as infile:
            data = json.load(infile)
            result.append(data)
    return result

def extract_answer(text):
    # 使用正则表达式匹配 "The answer is: ..." 的内容
    match = re.search(r"The answer is:\s*(.*)", text)
    if match:
        return match.group(1).strip()[0]  # 返回匹配的答案并去除多余空格
    return None  # 如果没有匹配到，返回 None


In [8]:
#dir_path = 'intevention_results'
dir_path = 'eval_results'

#get causal 
prompt_type=['basic','explicit','few_shot','zero_cot']

for prompt in prompt_type:
    json_files= [os.path.join(dir_path, prompt, f) for f in os.listdir(os.path.join(dir_path, prompt)) if f.endswith('.json')]
    result = merge_json_files(json_files)
    print(len(result))

    #save the result
    with open(prompt+'.json', 'w') as outfile:
        json.dump(result, outfile, indent=4)
    
    #get the causal graph
    causal_graph = []
    for r in result:
        temp_dict = {}
        for key, value in r.items():
            temp_dict[key] = get_causal(value)
        
        causal_graph.append(temp_dict)

    #save the causal graph
    with open(prompt+'_causal.json', 'w') as outfile:
        json.dump(causal_graph, outfile, indent=4)


10
10
10
10


In [22]:
dir_path = 'intevention_results'
csv_dir ='intevention'

#get answer 
for file,csv_file in zip(os.listdir(dir_path), os.listdir(csv_dir)):
    save_file = pd.read_csv(os.path.join(csv_dir, csv_file))

    data= json.load(open(os.path.join(dir_path, file)))
    

    #save the result into csv, add column "claud_answers"
    answer = []
    for d in data:
        answer.append(extract_answer(d))
    print(answer)
    save_file['claud_answers'] = answer
    save_file.to_csv(csv_file, index=False)


    

['C', 'C', 'C', 'B', 'C', 'B', 'C', 'C', 'A', 'A', 'A', 'C', 'A', 'B', 'C', 'B', 'B', 'B', 'A', 'C', 'A', 'C', 'B', 'A', 'C', 'A', 'A', 'A', 'C', 'A']
['C', 'C', 'B', 'C', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'A', 'B', 'A', 'A', 'A', 'B', 'C', 'B', 'B', 'A', 'B', 'B', 'B', 'A', 'B', 'C', 'C', 'C', 'A']
['A', 'A', 'C', 'B', 'B', 'B', 'C', 'C', 'A', 'B', 'C', 'C', 'A', 'C', 'C', 'A', 'C', 'B', 'C', 'B', 'C', 'B', 'B', 'C', 'A', 'B', 'A', 'A', 'B', 'C']


In [1]:

import openai
from openai import api_key
from openai import OpenAI
import base64
from mimetypes import guess_type


# Function to encode a local image into data URL
def local_image_to_data_url(image_path):
    # Guess the MIME type of the image based on the file extension
    mime_type, _ = guess_type(image_path)
    if mime_type is None:
        mime_type = 'application/octet-stream'  # Default MIME type if none is found

    # Read and encode the image file
    with open(image_path, "rb") as image_file:
        base64_encoded_data = base64.b64encode(image_file.read()).decode('utf-8')

    # Construct the data URL
    return f"data:{mime_type};base64,{base64_encoded_data}"

def gpt4_vision_generation(input_prompt, image_paths, model="gpt-4o-mini", temperature=0.5):
    """
    Generate a response from the GPT-4 model using a text prompt and up to 10 images.

    Args:
        input_prompt (str): The text prompt to provide to the model.
        image_paths (list): A list of up to 10 image paths.
        model (str): The model name to use for generation (default: "gpt-4o").
        temperature (float): The temperature setting for response randomness (default: 0.5).

    Returns:
        str: The response content generated by the model.
    """
    if not (1 <= len(image_paths) <= 10):
        raise ValueError("The number of image paths must be between 1 and 10.")

    # Load the image data for each image
    image_data_urls = [
        {"type": "image_url", "image_url": {"url": local_image_to_data_url(image_path)}}
        for image_path in image_paths
    ]

    # Initialize the OpenAI API client
    client = OpenAI(api_key="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")

    # Construct the request with the input prompt and all images
    messages = [{"role": "user", "content": {"type": "text", "text": input_prompt}}] + image_data_urls

    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=temperature,
    )

    # Return the response content
    return response.choices[0].message.content

In [2]:
response = gpt4_vision_generation('heella', ['Dataset/Hypo_v3_v_structure_256/Hypo_v3_v_structure_256/1.png'])

BadRequestError: Error code: 400 - {'error': {'message': "Invalid type for 'messages[0].content[0]': expected an object, but got a string instead.", 'type': 'invalid_request_error', 'param': 'messages[0].content[0]', 'code': 'invalid_type'}}

In [None]:
client = OpenAI(api_key="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")

    # Construct the request with the input prompt
    message = [{"role": "user", "content": {"type": "text", "text": "What is the capital of France?"}}]

    response = client.chat.completions.create(
        model='gpt-4o',
        messages=messages,
        temperature=1,
    )