In [1]:
import json
import os
import re
import csv

# Directory containing the JSON files
directory = '.'  # Current directory

# Initialize a dictionary to store all IDs for each species
all_ids_dict = {}

# Expression to extract the ID from the json file name
id_pattern = re.compile(r'specie-id-(\d+)\.json')

# Loop through each json file with species
for filename in os.listdir(directory):
    # Check if the file is a json file and starts with "specie"
    if filename.endswith('.json') and filename.startswith('specie'):
        match = id_pattern.search(filename)
        if match:
            overall_id = match.group(1)
            with open(os.path.join(directory, filename), 'r') as file:
                data = json.load(file)
                merged_ids = []
                if 'upSelection' in data:
                    merged_ids.extend(data['upSelection'])
                if 'downSelection' in data:
                    merged_ids.extend(data['downSelection'])

            
                # Add the merged list of IDs to the dictionary with the overall specie ID as the key
                all_ids_dict[overall_id] = merged_ids

# Print the dictionary 
print(all_ids_dict)

# Print only the keys
print(all_ids_dict.keys())

# File to save the dictionary in csv file
csv_file_path = 'all_ids.csv'

with open(csv_file_path, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    # Write the header row
    writer.writerow(['Overall_ID', 'Merged_IDs'])
    # Write each key-value 
    for overall_id, merged_ids in all_ids_dict.items():
        writer.writerow([overall_id, ','.join(str(id) for id in merged_ids)])

print(f"CSV file saved at: {csv_file_path}")

{'1174': ['405904', '414778', '594982', '806590', '336937', '619647', '1156541', '48102', '30774', '88083', '336935', '346709', '405903', '409593', '414777', '1156540', '579455', '619646', '806588', '148431', '222229', '259414', '336934', '346708', '405902', '409592', '414776', '1540190', '1565786', '1625191', '30773', '48100', '86074', '88082', '1390916', '1445840', '1453252', '1464736', '724486', '743141', '806587', '981408', '579454', '588696', '619645', '624095', '641560', '722237', '334575', '336933', '339808', '346707', '409591', '414775', '489601', '519951', '221386', '222228', '229913', '238318', '259413', '330375', '30772', '44982', '48099', '86073', '88081', '1525225', '1540189', '1540263', '1558001', '1554020', '1554177', '1565785', '1578775', '1339692', '1339869', '1390915', '1445839', '1464848', '1502877', '1042873', '1043176', '1153891', '1156538', '1205031', '806586', '875225', '888195', '971244', '990944', '715368', '724485', '735527', '743140', '750658', '750665', '769