In [37]:
import os

files = [f for f in os.listdir('../data/unified/specs') if not f.endswith('.json')]
files

[]

In [36]:
# Compare contents of two folders

import os

# Specify the folder paths
specs_folder = '../data/unified/specs'
imgs_folder = '../data/unified/imgs'

# Extract base file names from the specs folder
specs_files = os.listdir(specs_folder)
specs_base_names = {os.path.splitext(file)[0] for file in specs_files if file.endswith('.json')}

# Extract base file names from the imgs folder
imgs_files = os.listdir(imgs_folder)
imgs_base_names = {os.path.splitext(file)[0] for file in imgs_files if file.endswith('.png')}

# Find overlaps and differences
overlap = specs_base_names.intersection(imgs_base_names)
specs_only = specs_base_names.difference(imgs_base_names)
imgs_only = imgs_base_names.difference(specs_base_names)

# Report results
# print("Overlap:", overlap)
print("Files in specs only:", specs_only)
print("Files in imgs only:", imgs_only)

Files in specs only: set()
Files in imgs only: set()


In [None]:
import os
import json

def find_nested_field(data, field_name, field_value):
    """
    Recursively searches for a field_name with the field_value in nested JSON data.
    """
    if isinstance(data, dict):
        for key, value in data.items():
            if key == field_name and value == field_value:
                return True
            elif isinstance(value, (dict, list)):
                if find_nested_field(value, field_name, field_value):
                    return True
    elif isinstance(data, list):
        for item in data:
            if find_nested_field(item, field_name, field_value):
                return True
    return False

def list_json_files_with_nested_field(directory, field_name, field_value):
    matching_files = []
    
    # Loop over all files in the specified directory
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            # Construct the full path to the file
            file_path = os.path.join(directory, filename)

            try:
                # Open and parse the JSON file
                with open(file_path, 'r', encoding='utf-8') as file:
                    data = json.load(file)
                    
                    # Check if the nested field and value exist in the JSON data
                    if find_nested_field(data, field_name, field_value):
                        matching_files.append(filename)

            except (json.JSONDecodeError, IOError) as e:
                print(f"Error reading {filename}: {e}")

    return matching_files

# Example usage
directory = '../data/unified/specs'
field_name = 'type'
field_value = 'vcf'
matching_json_files = list_json_files_with_nested_field(directory, field_name, field_value)

print("Files with the specified nested field:")
for filename in matching_json_files:
    print(filename)
print(len(matching_json_files))

In [14]:
import os
import json

def find_files_with_types(folder_path, file_types):
    matching_files = {file_type: [] for file_type in file_types}
    
    try:
        # Loop through all files in the folder
        for filename in os.listdir(folder_path):
            if filename.endswith('.json'):
                file_path = os.path.join(folder_path, filename)
                
                try:
                    # Read and parse each JSON file
                    with open(file_path, 'r') as file:
                        json_data = json.load(file)
                        
                        # Function to recursively search for specified file types in the JSON
                        def search_for_type(obj, file_types):
                            if isinstance(obj, dict):
                                for key, value in obj.items():
                                    if key == "type" and value in file_types:
                                        return value
                                    if isinstance(value, (dict, list)):
                                        result = search_for_type(value, file_types)
                                        if result:
                                            return result
                            elif isinstance(obj, list):
                                for item in obj:
                                    result = search_for_type(item, file_types)
                                    if result:
                                        return result
                            return None
                        
                        # Check if any of the file types are found in the JSON
                        file_type_found = search_for_type(json_data, file_types)
                        if file_type_found:
                            matching_files[file_type_found].append(filename)

                except json.JSONDecodeError as e:
                    print(f"Error reading JSON file {filename}: {e}")
                except Exception as e:
                    print(f"Error processing file {filename}: {e}")
                    
    except Exception as e:
        print(f"Error accessing folder: {e}")
        return {}, 0
    
    # Print results
    for file_type, files in matching_files.items():
        print(f"\nFiles containing 'type':'{file_type}':")
        for file in files:
            print(file)
        print(f"Total number of '{file_type}' files: {len(files)}")
    
    total_files = sum(len(files) for files in matching_files.values())
    return matching_files, total_files

# Example usage
folder_path = "../data/gallery_p1/specs"
file_types = ["vcf", "gff", "bed", "bam"]
files, total_count = find_files_with_types(folder_path, file_types)


Files containing 'type':'vcf':
VCF_INDELS_sw_0_7_s_1_2.json
VCF_POINT_MUTATIONS_sw_1_2_s_1_0.json
VCF_POINT_MUTATIONS_sw_0_7_s_1_0.json
VCF_INDELS_sw_1_2_s_1_2.json
VCF_INDELS_sw_1_2_s_0_7.json
VCF_INDELS_sw_0_7_s_0_7.json
VCF_POINT_MUTATIONS_sw_1_0_s_1_0.json
VCF_INDELS_sw_1_0_s_1_2.json
VCF_INDELS_sw_1_0_s_0_7.json
VCF_POINT_MUTATIONS_sw_1_0_s_1_2.json
VCF_INDELS_sw_1_0_s_1_0.json
VCF_POINT_MUTATIONS_sw_1_0_s_0_7.json
VCF_POINT_MUTATIONS_sw_1_2_s_1_2.json
VCF_INDELS_sw_0_7_s_1_0.json
VCF_INDELS_sw_1_2_s_1_0.json
VCF_POINT_MUTATIONS_sw_0_7_s_1_2.json
VCF_POINT_MUTATIONS_sw_0_7_s_0_7.json
VCF_POINT_MUTATIONS_sw_1_2_s_0_7.json
Total number of 'vcf' files: 18

Files containing 'type':'gff':
GFF_DEMO_sw_1_2_s_1_0.json
GFF_DEMO_sw_0_7_s_1_0.json
GFF_DEMO_sw_1_0_s_1_0.json
GFF_DEMO_sw_1_0_s_0_7.json
GFF_DEMO_sw_1_0_s_1_2.json
GFF_DEMO_sw_0_7_s_0_7.json
GFF_DEMO_sw_1_2_s_0_7.json
GFF_DEMO_sw_1_2_s_1_2.json
GFF_DEMO_sw_0_7_s_1_2.json
Total number of 'gff' files: 9

Files containing 'type':'b

In [8]:
# Divide files into multiple partitions

import os
import shutil
from math import ceil

def divide_files_into_folders(source_folder, dest_folder_base, num_folders=5):
    # Get list of files in the source folder
    files = [f for f in os.listdir(source_folder) if os.path.isfile(os.path.join(source_folder, f))]

    # Calculate the number of files per folder
    num_files = len(files)
    files_per_folder = ceil(num_files / num_folders)

    # Ensure destination folders exist
    dest_folders = []
    for i in range(num_folders):
        folder_name = f"{dest_folder_base}_{i+1}"
        dest_folder = os.path.join(source_folder, folder_name)
        os.makedirs(dest_folder, exist_ok=True)
        dest_folders.append(dest_folder)

    # Distribute files to each destination folder
    for index, file in enumerate(files):
        dest_folder = dest_folders[index // files_per_folder]
        shutil.move(os.path.join(source_folder, file), os.path.join(dest_folder, file))

# Example usage
source_folder = '../data/unified/remaining_specs'
dest_folder_base = 'partition'
divide_files_into_folders(source_folder, dest_folder_base)

In [1]:
# Compare contents of two folders recursively

import os
import filecmp

def compare_directories(dir1, dir2):
    # Differ files
    differ_files = []

    # Compare the directories
    dir_cmp = filecmp.dircmp(dir1, dir2)

    # Files that are different
    differ_files.extend(dir_cmp.diff_files)
    
    # Files only present in one of the directories
    if dir_cmp.left_only or dir_cmp.right_only:
        differ_files.extend(dir_cmp.left_only)
        differ_files.extend(dir_cmp.right_only)

    # Recursively compare subdirectories
    for sub_dir in dir_cmp.subdirs:
        differ_files.extend(compare_directories(
            os.path.join(dir1, sub_dir),
            os.path.join(dir2, sub_dir)
        ))
        
    return differ_files

def main():
    folder1 = '../data/gallery_p1/specs_copy'
    folder2 = '../data/gallery_p1/specs'

    # Compare the folders
    diff_files = compare_directories(folder1, folder2)

    # Print out the differing files
    if diff_files:
        print("Differences found in the following files:")
        for file in diff_files:
            print(file)
    else:
        print("The folders are identical.")

if __name__ == "__main__":
    main()

Differences found in the following files:
DUMMY_TRACK_sw_0_7_s_0_7.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_0_7_s_0_7.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_0_7_s_1_0.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_0_7_s_1_2.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_1_0_s_0_7.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_1_0_s_1_0.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_1_0_s_1_2.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_1_2_s_0_7.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_1_2_s_1_0.json
EX_SPEC_CANCER_VARIANT_PROTOTYPE_sw_1_2_s_1_2.json
EX_SPEC_GENE_ANNOTATION_sw_0_7_s_0_7.json
EX_SPEC_GIVE_sw_0_7_s_0_7.json
EX_SPEC_GIVE_sw_0_7_s_1_0.json
EX_SPEC_GIVE_sw_0_7_s_1_2.json
EX_SPEC_GIVE_sw_1_0_s_0_7.json
EX_SPEC_GIVE_sw_1_0_s_1_0.json
EX_SPEC_GIVE_sw_1_0_s_1_2.json
EX_SPEC_GIVE_sw_1_2_s_0_7.json
EX_SPEC_GIVE_sw_1_2_s_1_0.json
EX_SPEC_GIVE_sw_1_2_s_1_2.json
EX_SPEC_MATRIX_HFFC6_sw_0_7_s_0_7.json
EX_SPEC_MATRIX_HFFC6_sw_0_7_s_1_0.json
EX_SPEC_MATRIX_HFFC6_sw_0_7_s_1_2.json
EX_SPEC_MATRIX_HFFC6_sw_1_0_s_0

In [3]:
# From a set of images (folder B), find their specs from a pool (A), and put them into folder C

import os
import shutil

# Define the paths to the folders
folder_a_path = '../data/autogosling/specs'
folder_b_path = './gene_annotation'
folder_c_path = './gene_specs'

# Ensure folder C exists
os.makedirs(folder_c_path, exist_ok=True)

# Get a set of all file names (without extension) in folder B
images_in_b = {os.path.splitext(filename)[0] for filename in os.listdir(folder_b_path) if filename.endswith('.png')}

# Iterate through each file in folder A
for filename in os.listdir(folder_a_path):
    # Split the filename to get the name and extension
    name, extension = os.path.splitext(filename)
    
    # Check if the file is a JSON and if the corresponding PNG exists in folder B
    if extension.lower() == '.json' and name in images_in_b:
        # Construct full file paths
        src_file_path = os.path.join(folder_a_path, filename)
        dest_file_path = os.path.join(folder_c_path, filename)
        
        # Copy the file from A to C
        shutil.copy2(src_file_path, dest_file_path)
        print(f"Copied: {filename}")

print("Finished copying files.")

Copied: gene_annotation_p_2_sw_1_0_s_1_0.json
Copied: gene_annotation_p_0_sw_1_2_s_1_0.json
Copied: gene_annotation_p_0_sw_0_7_s_1_0.json
Copied: gene_annotation_p_4_sw_0_7_s_1_0.json
Copied: gene_annotation_p_3_sw_1_0_s_1_0.json
Copied: gene_annotation_p_1_sw_1_2_s_1_0.json
Copied: gene_annotation_p_1_sw_0_7_s_1_0.json
Copied: gene_annotation_p_4_sw_1_2_s_1_0.json
Copied: gene_annotation_p_0_sw_1_0_s_1_0.json
Copied: gene_annotation_p_2_sw_1_2_s_1_0.json
Copied: gene_annotation_p_2_sw_0_7_s_1_0.json
Copied: gene_annotation_p_1_sw_1_0_s_1_0.json
Copied: gene_annotation_p_3_sw_1_2_s_1_0.json
Copied: gene_annotation_p_3_sw_0_7_s_1_0.json
Copied: gene_annotation_p_4_sw_1_0_s_1_0.json
Copied: gene_annotation_p_4_sw_1_0_s_0_7.json
Copied: gene_annotation_p_2_sw_1_2_s_1_2.json
Copied: gene_annotation_p_3_sw_0_7_s_0_7.json
Copied: gene_annotation_p_0_sw_1_0_s_1_2.json
Copied: gene_annotation_p_1_sw_1_0_s_0_7.json
Copied: gene_annotation_p_3_sw_1_2_s_0_7.json
Copied: gene_annotation_p_2_sw_0_7

In [30]:
import os
import json
import shutil
import copy

def process_json_recursively(data):
    """Recursively process JSON data to modify 'warm' to 'hot'"""
    if isinstance(data, dict):
        new_data = {}
        for key, value in data.items():            
            if key == "value" and value == "#0072B2":
                new_data[key] = "#E79F00"
            elif key == "value" and value == "#E79F00":
                new_data[key] = "darkgreen"
            elif key == "value" and value == "darkgreen":
                new_data[key] = "purple"
            else:
                new_data[key] = process_json_recursively(value)
        return new_data
    elif isinstance(data, list):
        return [process_json_recursively(item) for item in data]
    else:
        return data

# Specify the directory containing the JSON files
directory_path = './geranium_seeds'  # replace with your directory path
output_path = './recolor'

# Track if any changes were made
changes_made = False

# Loop through each file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".json"):
        file_path = os.path.join(directory_path, filename)

        # Open and read the JSON file
        with open(file_path, 'r', encoding='utf-8') as file:
            json_data = json.load(file)

        # Process the JSON data recursively
        new_data = process_json_recursively(json_data)

        # Check if any changes were made by comparing the original and processed data
        if json.dumps(new_data) != json.dumps(json_data):
            # Create a new filename for the modified JSON
            new_filename = f"{filename[:-5]}_recolor.json"  # Remove .json part and append _hot.json
            new_file_path = os.path.join(output_path, new_filename)

            # Write the new JSON data to a file
            with open(new_file_path, 'w', encoding='utf-8') as new_file:
                json.dump(new_data, new_file, indent=4)

            print(f"Created {new_filename} from {filename}")
            changes_made = True

if not changes_made:
    print("No files needed modification.")
else:
    print("Processing complete.")

Created EX_SPEC_MATRIX_HFFC6_sw_1_0_s_1_2_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_1_0_s_1_2.json
Created EX_SPEC_MATRIX_HFFC6_sw_1_0_s_0_7_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_1_0_s_0_7.json
Created EX_SPEC_MATRIX_HFFC6_sw_0_7_s_1_2_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_0_7_s_1_2.json
Created EX_SPEC_MATRIX_HFFC6_sw_1_2_s_1_2_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_1_2_s_1_2.json
Created EX_SPEC_MATRIX_HFFC6_sw_1_2_s_0_7_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_1_2_s_0_7.json
Created EX_SPEC_MATRIX_HFFC6_sw_0_7_s_0_7_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_0_7_s_0_7.json
Created EX_SPEC_MATRIX_HFFC6_sw_0_7_s_1_0_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_0_7_s_1_0.json
Created EX_SPEC_MATRIX_HFFC6_sw_1_2_s_1_0_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_1_2_s_1_0.json
Created EX_SPEC_MATRIX_HFFC6_sw_1_0_s_1_0_recolor.json from EX_SPEC_MATRIX_HFFC6_sw_1_0_s_1_0.json
Processing complete.
