In [16]:
import os
import json
from jsonschema import validate, ValidationError

In [17]:
standard_schema = {
    "type": "object",
    "properties": {
        "feature1": {
            "type": "object",
            "properties": {
                "explanation": {"type": "string"},
                "score": {"type": "number"}
            },
            "required": ["explanation", "score"]
        },
        "feature2": {
            "type": "object",
            "properties": {
                "explanation": {"type": "string"},
                "score": {"type": "number"}
            },
            "required": ["explanation", "score"]
        },
        "feature3": {
            "type": "object",
            "properties": {
                "explanation": {"type": "string"},
                "score": {"type": "number"}
            },
            "required": ["explanation", "score"]
        },
        "feature4": {
            "type": "object",
            "properties": {
                "explanation": {"type": "string"},
                "score": {"type": "number"}
            },
            "required": ["explanation", "score"]
        },
        "feature5": {
            "type": "object",
            "properties": {
                "explanation": {"type": "string"},
                "score": {"type": "number"}
            },
            "required": ["explanation", "score"]
        },
        "overall": {
            "type": "object",
            "properties": {
                "explanation": {"type": "string"},
                "score": {"type": "number"}
            },
            "required": ["explanation", "score"]
        }
    },
    "required": ["feature1", "feature2", "feature3", "feature4", "feature5", "overall"]
}

In [18]:
def validate_json(json_data, schema):
    try:
        validate(instance=json_data, schema=schema)
        return True
    except ValidationError as e:
        return False

def find_non_conforming_files(directory, schema):
    non_conforming_files = []
    for root, _, files in os.walk(directory):
        for filename in files:
            if filename.endswith(".json"):
                file_path = os.path.join(root, filename)
                with open(file_path, 'r') as file:
                    try:
                        json_data = json.load(file)
                        if not validate_json(json_data, schema):
                            non_conforming_files.append(file_path)
                    except json.JSONDecodeError:
                        # 如果JSON文件解析失败，也视为不符合标准
                        non_conforming_files.append(file_path)
    return non_conforming_files

In [19]:
json_directory = 'data/train_solution'

non_conforming_files = find_non_conforming_files(json_directory, standard_schema)

print("以下文件格式不符合标准:")
for file in non_conforming_files:
    relative_path = os.path.relpath(file, json_directory)
    print(relative_path)

以下文件格式不符合标准:
problem-2923/para_5_and_6.json
problem-2982/para_10_and_11.json
problem-3261/para_4_and_5.json
problem-3437/para_22_and_23.json
problem-3439/para_6_and_7.json
problem-2144/para_6_and_7.json
problem-1735/para_1_and_2.json
problem-2204/para_4_and_5.json
problem-2007/para_2_and_3.json
problem-2463/para_2_and_3.json
problem-2633/para_3_and_4.json
problem-1683/para_1_and_2.json
problem-2858/para_9_and_10.json
problem-3102/para_4_and_5.json
problem-2272/para_1_and_2.json
problem-3103/para_1_and_2.json
problem-1945/para_12_and_13.json
problem-2398/para_4_and_5.json
problem-2159/para_3_and_4.json
problem-1974/para_4_and_5.json
problem-3457/para_3_and_4.json
problem-1556/para_11_and_12.json
problem-299/para_2_and_3.json
problem-2140/para_2_and_3.json
problem-3231/para_1_and_2.json
problem-2469/para_3_and_4.json
problem-2838/para_3_and_4.json
problem-1672/para_4_and_5.json
problem-2297/para_1_and_2.json
problem-1618/para_7_and_8.json
problem-170/para_2_and_3.json
problem-2461/para_5

In [11]:
def calculate_overall_average(data):
    feature_scores = [data[feature]['score'] for feature in data if feature.startswith('feature')]
    average_score = round(sum(feature_scores) / len(feature_scores), 2)
    data['overall']['score'] = average_score
    return data

In [12]:
def process_directory(directory):
    for root, _, files in os.walk(directory):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                with open(file_path, 'r') as json_file:
                    data = json.load(json_file)
                updated_data = calculate_overall_average(data)
                with open(file_path, 'w') as json_file:
                    json.dump(updated_data, json_file, indent=4)
                print(f'Updated {file_path}')

In [3]:
def process_directory(directory):
    for root, _, files in os.walk(directory):
        print(f'Processing directory: {root}')
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)
                print(f'Processing file: {file_path}')
                try:
                    with open(file_path, 'r') as json_file:
                        data = json.load(json_file)
                    updated_data = calculate_overall_average(data)
                    with open(file_path, 'w') as json_file:
                        json.dump(updated_data, json_file, indent=4)
                    print(f'Successfully updated {file_path}')
                except Exception as e:
                    print(f'Failed to process {file_path}: {e}')

In [15]:
# Set the directory path
directory_path = 'data/train_solution'

# Process the directory
process_directory(directory_path)

Updated data/train_solution/problem-3496/para_2_and_3.json
Updated data/train_solution/problem-3496/para_3_and_4.json
Updated data/train_solution/problem-3496/para_1_and_2.json
Updated data/train_solution/problem-4011/para_2_and_3.json
Updated data/train_solution/problem-4011/para_4_and_5.json
Updated data/train_solution/problem-4011/para_3_and_4.json
Updated data/train_solution/problem-4011/para_5_and_6.json
Updated data/train_solution/problem-4011/para_1_and_2.json
Updated data/train_solution/problem-604/para_6_and_7.json
Updated data/train_solution/problem-604/para_2_and_3.json
Updated data/train_solution/problem-604/para_7_and_8.json
Updated data/train_solution/problem-604/para_4_and_5.json
Updated data/train_solution/problem-604/para_3_and_4.json
Updated data/train_solution/problem-604/para_5_and_6.json
Updated data/train_solution/problem-604/para_1_and_2.json
Updated data/train_solution/problem-1335/para_2_and_3.json
Updated data/train_solution/problem-1335/para_3_and_4.json
Upda

with open('para_1_and_2.json', 'r') as file:
    data = json.load(file)

updated_data = calculate_overall_average(data)

with open('para_1_and_2.json', 'w') as file:
    json.dump(updated_data, file, indent=4)