#### Result analysis - the score of each model in each type

In [12]:
from collections import defaultdict
import sys
import os
import os
import json

def stream_jsonl(file_path):
    with open(file_path, 'r') as file:
        for line in file:
            yield json.loads(line)

models_names = ['qwen-vl-max', 'claude-3-5-sonnet-20240620']
paths = ['not_refine/ouput_results.jsonl', 'refine/output_1_results.jsonl', 'refine/output_2_results.jsonl']
tasks = ['Webpage', 'Matplotlib', 'HumanEval-V', 'MBPP-V', 'GSM8K-V', 'MATH-V', 'SVG', 'TikZ', 'VP']
#
data = defaultdict(lambda: defaultdict(dict))
def recursive_defaultdict():
    return defaultdict(recursive_defaultdict)
error_data = recursive_defaultdict()
for model_name in models_names:
    print(f'===================={model_name}====================')
    for path in paths:
        file_path = f"output/{model_name}/{path}"
        result_analysis = defaultdict(lambda: defaultdict(int))
        if not os.path.exists(file_path):
            continue
        if "not_refine"  in file_path:
            file_name = "not_refine"
        elif "output_1" in file_path:
            file_name = "first_refine"
        else:
            file_name = "second_refine"
        print("*" * 10 + f'{file_name}' + "*" * 10)
        total = defaultdict(list)
        correct = defaultdict(list)
        score = defaultdict(list)
        results = defaultdict(list)
        results = stream_jsonl(file_path)
        for result in results:
            task_type = result['type']
            if task_type in ['HumanEval-V', 'MBPP-V', 'GSM8K-V', 'MATH-V', 'VP']:
                correct[task_type].append(result['passed'])
            else:
                score[task_type].append(result["score"])
        evaluate_result = defaultdict(list)
        keys = list(correct.keys()) + list(score.keys())
        for task_type in keys:
            if task_type in ['HumanEval-V', 'MBPP-V', 'GSM8K-V', 'MATH-V', 'VP']:
                evaluate_result[task_type] = sum(correct[task_type]) / len(correct[task_type])
                print(f'{task_type} correctness: {evaluate_result[task_type]}')
            else:
                evaluate_result[task_type] = sum(score[task_type]) / len(score[task_type])
                print(f'{task_type} score: {evaluate_result[task_type]}')

**********second_refine**********
HumanEval-V correctness: 1.0
MBPP-V correctness: 0.5
GSM8K-V correctness: 0.5
MATH-V correctness: 0.5
VP correctness: 0.0
Webpage score: 4.0
Matplotlib score: 4.0
SVG score: 2.0
TikZ score: 3.5


In [10]:
import os
import re

def contains_chinese_characters(string):

    return re.search(r'[\u4e00-\u9fff]', string) is not None

def check_file_for_chinese_characters(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            for line in file:
                if contains_chinese_characters(line):
                    return True
    except Exception as e:
        # print(f"Error reading file {file_path}: {e}")\
        pass
    return False

def recursive_search(directory):
    files_with_chinese = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            full_path = os.path.join(root, file)
            if "tmp_file" in full_path:
                continue
            if os.path.isfile(full_path) and check_file_for_chinese_characters(full_path):
                files_with_chinese.append(full_path)
    return files_with_chinese


directory_to_search = '/home/huangyajun/data/dataset_MLLM_code_ability'


files_with_chinese = recursive_search(directory_to_search)

if files_with_chinese:
    print("Files with Chinese characters found:")
    for file in files_with_chinese:
        print(file)
else:
    print("No files with Chinese characters found.")


Files with Chinese characters found:
/home/huangyajun/data/dataset_MLLM_code_ability/README.md
/home/huangyajun/data/dataset_MLLM_code_ability/output/qwen-vl-max/refine/output_1.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/output/qwen-vl-max/refine/output_2.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/output/qwen-vl-max/refine_/output_1.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/output/qwen-vl-max/refine_/output_2.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/output/qwen-vl-max/not_refine/output.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/output/claude-3-5-sonnet-20240620/refine/output_1.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/output/claude-3-5-sonnet-20240620/refine/output_2.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/output/claude-3-5-sonnet-20240620/not_refine/output.jsonl
/home/huangyajun/data/dataset_MLLM_code_ability/logs/refine_ouput/claude-3-5-sonnet-20240620.txt
/home/huangyajun/data/dataset_MLLM_code_abili

In [45]:
from collections import defaultdict
import json 
import shutil
from utils import get_code
def read_map_jsonl(file_path):
    map_data = defaultdict(list)
    with open(file_path, 'r') as file:
        for line in file:
            data = json.loads(line)
            map_data[data['type']].append(data)
    return map_data

def add_write_jsonl(file_path, data):
    with open(file_path, 'a') as file:
        json.dump(data, file)
        file.write('\n')
# function_name, id, evaluate_funciton
data = read_map_jsonl('data/full_data.jsonl')
tasks = ['Webpage', 'Matplotlib', 'HumanEval-V', 'MBPP-V', 'GSM8K-V', 'MATH-V', 'SVG', 'TikZ', 'VP']
# base_prompt = {}
# for type, d in data.items():
#     base_prompt[type] = d[0]['prompt']
with open('prompt/base_prompt.json', 'w') as file:
    json.dump(base_prompt, file, indent=4)
for type, d_list in data.items():
    start_id = 0
    image_path = f'data/{type}/images'
    file_path = f'data/dataset.jsonl'
    # image_path = f'data/full_data/images'
    # file_path = f'data/full_data/dataset.jsonl'
    os.makedirs(image_path, exist_ok=True)
    if type in ['HumanEval-V', 'GSM8K-V', 'MATH-V', 'MBPP-V']:
        for d in d_list:
            dict_ = {}
            dict_['id'] = start_id
            s_id = str(start_id)
            if len(s_id) == 1:
                s_id = '00' + s_id
            elif len(s_id) == 2:
                s_id = '0' + s_id
            dict_['path'] = f"{type}/images/{s_id}.png"
            dict_['function_name'] = d['entry_point'].strip()
            if type == 'MBPP-V':
                dict_['function_name'] = get_code(d['prompt'], '```python\ndef ', ":\n```")
                # if dict_['function_name'] not in dict_['function_definition']:
                #     raise ValueError("Function definition not found")
                # print(dict_['function_name'].split('(')[0])
                # if dict_['function_name'].split('(')[0] != d['entry_point'].strip():
                #     print(f'{dict_["function_name"]} {d["entry_point"]}')
                #     print(d['task_id'])
            dict_['evaluation_function'] = d['test']
            dict_['type'] = type
            now_image_path = d['task_id'] + ".png"
            target_image_path  = f'{image_path}/{s_id}.png'
            shutil.copy(now_image_path, target_image_path)
            add_write_jsonl(file_path, dict_)
            start_id += 1
    elif type == "VP":
        for d in d_list:
            dict_ = {}
            dict_['id'] = start_id
            s_id = str(start_id)
            if len(s_id) == 1:
                s_id = '00' + s_id
            elif len(s_id) == 2:
                s_id = '0' + s_id
            dict_['path'] = f"{type}/images/{s_id}.png"
            dict_['ocr_result'] = get_code(d['prompt'], "####################OCR result####################\n", "\n####################OCR result####################")
            dict_['query'] = get_code(d['prompt'], "######OCR result####################\n\nQuery: ", "\n\nIf you think you can answer the question")
            dict_['reference_answer'] = d['test']
            dict_['type'] = type
            now_image_path = d['task_id'] + ".png"
            target_image_path  = f'{image_path}/{s_id}.png'
            shutil.copy(now_image_path, target_image_path)
            add_write_jsonl(file_path, dict_)
            start_id += 1
    else:
        for d in d_list:
            dict_ = {}
            dict_['id'] = start_id
            s_id = str(start_id)
            if len(s_id) == 1:
                s_id = '00' + s_id
            elif len(s_id) == 2:
                s_id = '0' + s_id
            dict_['path'] = f"{type}/images/{s_id}.png"
            # if type == 'SVG':
            #     dict_['original_svg_code'] = d['test']
            dict_['type'] = type
            now_image_path = d['task_id'] + ".png"
            target_image_path  = f'{image_path}/{s_id}.png'
            shutil.copy(now_image_path, target_image_path)
            add_write_jsonl(file_path, dict_)
            start_id += 1
        # for d in d_list:
        #     print(f'{d["function_name"]} {d["id"]} {d["evaluate_function"]}')