In [4]:
import json

def update_dna_alignment_data(geneturing_file, human_genome_file, output_file):
    """
    Reads two JSON files, updates the 'Human genome DNA aligment' section
    in the first file with values from the second file, and saves the result.

    Args:
        geneturing_file (str): Path to the geneturing_updated.json file.
        human_genome_file (str): Path to the Human genome DNA aligment.json file.
        output_file (str): Path where the updated JSON will be saved.
    """
    try:
        # 1) Read in both files
        # Using 'r' mode without specifying newline allows Python to handle universal newlines
        with open(geneturing_file, 'r') as f:
            geneturing_data = json.load(f)

        with open(human_genome_file, 'r') as f:
            human_genome_data = json.load(f)

        # Ensure the target section exists
        if "Human genome DNA aligment" not in geneturing_data:
            print(f"Error: '{geneturing_file}' does not contain the expected 'Node of \"Human genome DNA aligment\"' section.")
            return

        dna_alignment_section = geneturing_data["Human genome DNA aligment"]

        # 2) Update the nodes
        # The human_genome_data is a list of lists.
        # The first element of each inner list is the query, the second is the answer.
        human_genome_lookup = {}
        for item in human_genome_data:
            if isinstance(item, list) and len(item) > 1 and isinstance(item[0], str):
                # The first element is the question, the second is the answer
                # The first item of the inner most array has the question
                # The second item is the exact value we need to update
                if isinstance(item[1], str):
                    human_genome_lookup[item[0]] = item[1]
                elif isinstance(item[1], list) and len(item[1]) > 1:
                    # If the second element is a list, try to get the second item from it
                    for sub_item in item[1]:
                        if isinstance(sub_item, str) and sub_item.startswith("chr"):
                            human_genome_lookup[item[0]] = sub_item
                            break
                    else:
                        print(f"Warning: Could not find a 'chr' string in the second element for query: {item[0]}")
            elif isinstance(item, list) and len(item) > 2 and isinstance(item[0], str):
                # This handles the case where the structure is ['query', 'chr...', [{...}]]
                # We need the second element, which is the "chr" string.
                if isinstance(item[1], str) and item[1].startswith("chr"):
                    human_genome_lookup[item[0]] = item[1]
            else:
                print(f"Skipping unexpected format in human_genome_data: {item}")


        updated_count = 0
        for question in list(dna_alignment_section.keys()): # Iterate over a copy of keys as dict might be modified
            if question in human_genome_lookup:
                original_answer = dna_alignment_section[question]
                new_answer = human_genome_lookup[question]
                if original_answer != new_answer:
                    dna_alignment_section[question] = new_answer
                    updated_count += 1
                    print(f"Updated: '{question}' from '{original_answer}' to '{new_answer}'")
            else:
                print(f"Warning: No matching answer found in '{human_genome_file}' for question: '{question}'")


        # 3) Save the updated json
        # Using newline='' to ensure consistent newline characters (e.g., '\n' for Unix-style)
        with open(output_file, 'w', newline='') as f:
            json.dump(geneturing_data, f, indent=4)

        print(f"\nSuccessfully updated {updated_count} entries. Updated data saved to '{output_file}'")

    except FileNotFoundError as e:
        print(f"Error: File not found - {e}")
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON format - {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")

# Define file paths
geneturing_file_path = 'data/geneturing_updated.json'
human_genome_file_path = 'results/111111/v3_code/Human genome DNA aligment.json'
output_file_path = 'data/geneturing_updated2.json'

# Run the update function
update_dna_alignment_data(geneturing_file_path, human_genome_file_path, output_file_path)


Updated: 'Align the DNA sequence to the human genome:ATTCTGCCTTTAGTAATTTGATGACAGAGACTTCTTGGGAACCACAGCCAGGGAGCCACCCTTTACTCCACCAACAGGTGGCTTATATCCAATCTGAGAAAGAAAGAAAAAAAAAAAAGTATTTCTCT' from 'chr15:91950805-91950932' to 'chr15:94509-94636'
Updated: 'Align the DNA sequence to the human genome:GGACAGCTGAGATCACATCAAGGATTCCAGAAAGAATTGGCACAGGATCATTCAAGATGCATCTCTCCGTTGCCCCTGTTCCTGGCTTTCCTTCAACTTCCTCAAAGGGGACATCATTTCGGAGTTTGGCTTCCA' from 'chr8:7081648-7081782' to 'chr8:36227-36361'
Updated: 'Align the DNA sequence to the human genome:AAACGATGTCTTCATTGCCTGGAAATGATGGCGCCCTTGTTCTTTATCCAAAGACTGATGGGGGAAAGAGTAATTCATTTAATAACATGGGGTCCTCATTACAGACTGGCCACCAATATAAAGCTTCGAATTTTTT' from 'chr10:7531973-7532108' to 'chr10:42153-42288'
Updated: 'Align the DNA sequence to the human genome:AGGCCCTCACCTGGAAATTACTTACTCATGCTTCATGACCCAGTTCAAATTTTGTCACCTCTGTGAAACCTTCCCTGGGCCCCGTTGATCTCCTTGAAGGCA' from 'chr7:71368450-71368551' to 'chr7:25483-25584'
Updated: 'Align the DNA sequence to the human genome:ATTAAACGCCCCTTAAAT