In [None]:
import json
import os


In [None]:

input_directory = "/stac/output/jsons/"
output_directory = "/output/ndjsons/"


In [None]:

def create_ndjson(file_path, data):
    with open(file_path, 'w') as f:
        for item in data:
            f.write(json.dumps(item) + '\n')


def find_json_files(directory):
    collections = []
    catalogs = []
    items = []

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.json'):
                file_path = os.path.join(root, file)

                # Categorize JSON files
                if "collection" in file:
                    collections.append(file_path)
                elif "catalog" in file:
                    catalogs.append(file_path)
                else:
                    items.append(file_path)  

    return collections, catalogs, items


def read_json_files(file_list):
    data = []
    for file_path in file_list:
        with open(file_path, 'r') as f:
            try:
                content = json.load(f)
                if isinstance(content, list):
                    data.extend(content)
                else:
                    data.append(content)
            except json.JSONDecodeError:
                print(f"Skipping invalid JSON file: {file_path}")
    return data

def main(input_directory, output_directory):

    collections, catalogs, items = find_json_files(input_directory)

 
    collections_data = read_json_files(collections)
    catalogs_data = read_json_files(catalogs)
    items_data = read_json_files(items)


    create_ndjson(os.path.join(output_directory, "collections.ndjson"), collections_data)
    create_ndjson(os.path.join(output_directory, "catalogs.ndjson"), catalogs_data)
    create_ndjson(os.path.join(output_directory, "items.ndjson"), items_data)
    
    print("NDJSON files created:")
    print(f" - Collections: {len(collections_data)} entries")
    print(f" - Catalogs: {len(catalogs_data)} entries")
    print(f" - Items: {len(items_data)} entries")

In [None]:

main(input_directory, output_directory)