In [7]:
import csv
import json

# Path to the input CSV file
csv_file_path = 'consolidated_output_full.csv'
# Path to the output JSON file
output_json_file_path = 'output_labels_locations.json'

# Function to extract IUPAC nomenclature and locations and save to JSON
def extract_and_save_to_json(input_csv_path, output_json_path):
    data = []
    with open(input_csv_path, mode='r', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        for row in reader:
            try:
                output = json.loads(row['output'])
                for molecule_key, molecule_data in output.items():
                    # Ensure molecule_data is a dictionary
                    if isinstance(molecule_data, str):
                        molecule_data = json.loads(molecule_data)
                    iupac_nomenclature = molecule_data.get('IUPAC_nomenclature', '')
                    collection_site = molecule_data.get('collectionSite', '')
                    data.append({'IUPAC Nomenclature': iupac_nomenclature, 'Collection Site': collection_site})
            except json.JSONDecodeError as e:
                print(f"Error decoding JSON for row: {row['pdf']}")
                print(f"Error message: {str(e)}")
                continue
    
    with open(output_json_file_path, 'w', encoding='utf-8') as jsonfile:
        json.dump(data, jsonfile, indent=4)

# Execute the function
extract_and_save_to_json(csv_file_path, output_json_file_path)