In [None]:
import json
import os
import re

def extract_type(file_name):
    """
    Extracts the type (e.g., 'normal' or 'anomaly') from the file name.
    Adjust the regex pattern based on your file naming conventions.
    """
    match = re.search(r'_(normal|anomaly)_', file_name, re.IGNORECASE)
    return match.group(1).lower() if match else "unknown"

def extract_features(audio_feature):
    file_path = audio_feature.get("file_path", "")
    file_name = os.path.basename(file_path)
    domain = audio_feature.get("domain", "")
    machine_type = audio_feature.get("machine_type", "")
    note = audio_feature.get("note", "")
    
    # Determine the type from the file name
    audio_type = extract_type(file_name)
    
    features = audio_feature.get("features", {})
    
    # Extract linear spectrogram details
    linear_with_axes = features.get("linear_spectrogram_with_axes", {})
    linear_no_axes = features.get("linear_spectrogram_no_axes", {})
    
    # Group float features
    float_features = {
        "zero_crossing_rate": features.get("zero_crossing_rate", 0.0),
        "harmonic_to_noise_ratio": features.get("harmonic_to_noise_ratio", 0.0),
        "spectral_flatness": features.get("spectral_flatness", 0.0),
        "spectral_rolloff": features.get("spectral_rolloff", 0.0),
        "rms_energy": features.get("rms_energy", 0.0),
        "entropy": features.get("entropy", 0.0),
        "std": features.get("std", 0.0),
        "avg": features.get("avg", 0.0),
    }
    
    extracted_data = {
        "file_path": file_path,
        "file_name": file_name,
        "linear_spectrogram_with_axes": {
            "path": linear_with_axes.get("path", ""),
            "librosa_parameters": linear_with_axes.get("librosa_parameters", {}),
            "plot_parameters": linear_with_axes.get("plot_parameters", {})
        },
        "linear_spectrogram_no_axes": {
            "path": linear_no_axes.get("path", ""),
            "librosa_parameters": linear_no_axes.get("librosa_parameters", {}),
            "plot_parameters": linear_no_axes.get("plot_parameters", {})
        },
        "float_features": float_features,  # Float features grouped here
        "domain": domain,
        "type": audio_type,
        "machineType": machine_type,
        "explanation_about_spectrogram": note  # Renamed from 'note'
    }
    
    return extracted_data

def transform_json(input_file, output_file):
    with open(input_file, 'r') as infile:
        data = json.load(infile)
    
    audio_features = data.get("audio_features", [])
    transformed_data = []
    
    for audio_feature in audio_features:
        transformed_entry = extract_features(audio_feature)
        transformed_data.append(transformed_entry)
    
    # Write the transformed data to the output JSON
    with open(output_file, 'w') as outfile:
        json.dump({"ladlm_dataset_view": transformed_data}, outfile, indent=4, ensure_ascii=False)
    
    print(f"Transformed data has been saved to {output_file}")

if __name__ == "__main__":
    input_json = "../extract_feature/audio_features.json"       
    output_json = "ladlm_dataset_view.json"  
    transform_json(input_json, output_json)


Transformed data has been saved to ladlm_dataset_view.json


# note 필드가 존재하는 것만 뽑아보자.

In [None]:
import json
import os
import re

def extract_type(file_name):
    """
    Extracts the type (e.g., 'normal' or 'anomaly') from the file name.
    Adjust the regex pattern based on your file naming conventions.
    """
    match = re.search(r'_(normal|anomaly)_', file_name, re.IGNORECASE)
    return match.group(1).lower() if match else "unknown"

def extract_features(audio_feature):
    file_path = audio_feature.get("file_path", "")
    file_name = os.path.basename(file_path)
    domain = audio_feature.get("domain", "")
    machine_type = audio_feature.get("machine_type", "")
    note = audio_feature.get("note", "")
    
    # Determine the type from the file name
    audio_type = extract_type(file_name)
    
    features = audio_feature.get("features", {})
    
    # Extract linear spectrogram details
    linear_with_axes = features.get("linear_spectrogram_with_axes", {})
    linear_no_axes = features.get("linear_spectrogram_no_axes", {})
    
    # Group float features
    float_features = {
        "zero_crossing_rate": features.get("zero_crossing_rate", 0.0),
        "harmonic_to_noise_ratio": features.get("harmonic_to_noise_ratio", 0.0),
        "spectral_flatness": features.get("spectral_flatness", 0.0),
        "spectral_rolloff": features.get("spectral_rolloff", 0.0),
        "rms_energy": features.get("rms_energy", 0.0),
        "entropy": features.get("entropy", 0.0),
        "std": features.get("std", 0.0),
        "avg": features.get("avg", 0.0),
    }
    
    extracted_data = {
        "file_path": file_path,
        "file_name": file_name,
        "linear_spectrogram_with_axes": {
            "path": linear_with_axes.get("path", ""),
            "librosa_parameters": linear_with_axes.get("librosa_parameters", {}),
            "plot_parameters": linear_with_axes.get("plot_parameters", {})
        },
        "linear_spectrogram_no_axes": {
            "path": linear_no_axes.get("path", ""),
            "librosa_parameters": linear_no_axes.get("librosa_parameters", {}),
            "plot_parameters": linear_no_axes.get("plot_parameters", {})
        },
        "float_features": float_features,  # Float features grouped here
        "domain": domain,
        "type": audio_type,
        "machineType": machine_type,
        "explanation_about_spectrogram": note  # Renamed from 'note'
    }
    
    return extracted_data

def transform_json(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as infile:
        data = json.load(infile)
    
    audio_features = data.get("audio_features", [])
    
    # Filter out entries with an empty 'note'
    filtered_features = [f for f in audio_features if f.get("note", "").strip()]
    
    transformed_data = []
    
    for audio_feature in filtered_features:
        transformed_entry = extract_features(audio_feature)
        transformed_data.append(transformed_entry)
    
    # Write the transformed data to the output JSON
    with open(output_file, 'w', encoding='utf-8') as outfile:
        json.dump({"ladlm_dataset_view": transformed_data}, outfile, indent=4, ensure_ascii=False)
    
    print(f"Transformed data has been saved to {output_file}")

if __name__ == "__main__":
    input_json = "../extract_feature/audio_features.json"        # Replace with your input file path
    output_json = "ladlm_dataset_view.json"  # Desired output file path
    transform_json(input_json, output_json)
