In [None]:
%cd ~/REVIVAL2
%load_ext autoreload
%autoreload 2
%load_ext blackcellmagic

In [None]:
import json
import os

def load_json(file_path):
    """Load JSON content from a file."""
    with open(file_path, "r") as f:
        return json.load(f)

def compare_fields(json1, json2, fields_to_compare):
    """Compare specific fields between two JSON files."""
    differences = {}
    for field in fields_to_compare:
        value1 = json1.get("sequences", [])[0].get("protein", {}).get(field, None)
        value2 = json2.get("sequences", [])[0].get("protein", {}).get(field, None)

        if value1 != value2:
            differences[field] = {"file1": value1, "file2": value2}

    return differences

# Directory containing JSON files
# json_dir = "/path/to/json/files"
fields_to_compare = ["modifications", "unpairedMsa", "pairedMsa", "templates"]  # Add fields to compare


# Paths to the output JSON files
json_file1 = "/disk2/fli/REVIVAL2/zs/af3_joint/struct/PfTrpB-5iodo/i165di183h/i165di183h_data.json"
json_file2 = "/disk2/fli/REVIVAL2/zs/af3_joint/struct/PfTrpB-5iodo/y301l/y301l_data.json"
# json_file2 = "/disk2/fli/REVIVAL2/zs/af3_joint/struct/PfTrpB-5iodo/i165gi183vy301e/i165gi183vy301e_data.json"

# Get all JSON files in the directory
json_files = [json_file1, json_file2]

# Pairwise comparison of all JSON files
for i in range(len(json_files)):
    for j in range(i + 1, len(json_files)):
        json_file1 = json_file1
        json_file2 = json_file2

        # Load JSON data
        json_data1 = load_json(json_file1)
        json_data2 = load_json(json_file2)

        # Compare fields
        differences = compare_fields(json_data1, json_data2, fields_to_compare)

        # Print results
        if differences:
            print(f"Comparing {json_files[i]} and {json_files[j]}:")
            for field, diff in differences.items():
                print(f"  Field: {field}")
                print(f"    {json_files[i]}: {diff['file1']}")
                print(f"    {json_files[j]}: {diff['file2']}")
        else:
            print(f"{json_files[i]} and {json_files[j]} have the same fields for {fields_to_compare}.")


In [20]:
def granular_compare(value1, value2, field_name):
    """
    Perform a granular comparison of two values within a field.
    Handles nested dictionaries, lists, and other data structures.
    """
    differences = []

    # If values are dictionaries, compare keys and values
    if isinstance(value1, dict) and isinstance(value2, dict):
        keys1 = set(value1.keys())
        keys2 = set(value2.keys())
        common_keys = keys1 & keys2
        missing_in_1 = keys2 - keys1
        missing_in_2 = keys1 - keys2

        # Report missing keys
        if missing_in_1:
            differences.append(f"{field_name}: Keys missing in File 1: {missing_in_1}")
        if missing_in_2:
            differences.append(f"{field_name}: Keys missing in File 2: {missing_in_2}")

        # Compare common keys recursively
        for key in common_keys:
            sub_differences = granular_compare(value1[key], value2[key], f"{field_name}.{key}")
            differences.extend(sub_differences)

    # If values are lists, compare each element
    elif isinstance(value1, list) and isinstance(value2, list):
        min_length = min(len(value1), len(value2))
        for i in range(min_length):
            sub_differences = granular_compare(value1[i], value2[i], f"{field_name}[{i}]")
            differences.extend(sub_differences)

        # Report extra elements in either list
        if len(value1) > len(value2):
            differences.append(f"{field_name}: Extra elements in File 1: {value1[min_length:]}")
        elif len(value2) > len(value1):
            differences.append(f"{field_name}: Extra elements in File 2: {value2[min_length:]}")

    # If values are primitives (e.g., str, int), compare directly
    elif value1 != value2:
        differences.append(f"{field_name}: Value mismatch: File 1 = {value1}, File 2 = {value2}")

    return differences


def compare_json_fields_granular(json1, json2, fields_to_compare):
    """
    Perform granular comparison for specified fields between two JSON objects.
    """
    all_differences = []
    for field in fields_to_compare:
        value1 = json1.get(field, None)
        value2 = json2.get(field, None)
        if value1 is None or value2 is None:
            if value1 is None and value2 is not None:
                all_differences.append(f"{field} is missing in File 1")
            elif value2 is None and value1 is not None:
                all_differences.append(f"{field} is missing in File 2")
        else:
            differences = granular_compare(value1, value2, field)
            all_differences.extend(differences)
    return all_differences


In [22]:
# Paths to the output JSON files
json_file1 = "/disk2/fli/REVIVAL2/zs/af3_joint/struct/PfTrpB-5iodo/i165di183h/i165di183h_data.json"
json_file2 = "/disk2/fli/REVIVAL2/zs/af3_joint/struct/PfTrpB-5iodo/y301l/y301l_data.json"
# json_file2 = "/disk2/fli/REVIVAL2/zs/af3_joint/struct/PfTrpB-5iodo/i165gi183vy301e/i165gi183vy301e_data.json"


In [None]:
# Load the JSON data
data1 = load_json(json_file1)
data2 = load_json(json_file2)

# Fields to compare
fields_to_compare = ["sequences", "unpairedMsa", "pairedMsa", "templates"]

# Perform granular comparison
differences = compare_json_fields_granular(data1, data2, fields_to_compare)

# Print results
if differences:
    print("Differences found:")
    for diff in differences:
        print(diff)
else:
    print("The JSON fields are identical.")