In [5]:
import json
import os
import numpy as np

def extract_features(file_path, result_folder):
    # 예시 데이터 반환
    return {
        "file_path": file_path,
        "features": {
            "zero_crossing_rate": 0.012,
            "harmonic_to_noise_ratio": 0.75,
            "spectral_flatness": 0.3,
            "spectral_rolloff": 2800.0,
            "rms_energy": 0.05,
            "entropy": 1.45,
            "waveform": os.path.join(result_folder, "file_waveform.png"),
            "envelope": os.path.join(result_folder, "file_envelope.png"),
            "cepstrum": os.path.join(result_folder, "file_cepstrum.png"),
            "mel_spectrogram": os.path.join(result_folder, "file_mel.png"),
            "mfcc": os.path.join(result_folder, "file_mfcc.png"),
            "linear_spectrogram": os.path.join(result_folder, "file_linear.png"),
            "chroma_features": os.path.join(result_folder, "file_chroma.png"),
            "spectral_centroid": os.path.join(result_folder, "file_spectral_centroid.png"),
            "spectral_bandwidth": os.path.join(result_folder, "file_spectral_bandwidth.png"),
            "wavelet_transform": os.path.join(result_folder, "file_wavelet.png"),
            "power_spectrum": os.path.join(result_folder, "file_power_spectrum.png"),
            "std": 0.123,
            "avg": 0.456
        },
    }

def create_json_structure(data_entries, output_json_path):
    data = {"audio_features": data_entries}
    with open(output_json_path, 'w') as json_file:
        json.dump(data, json_file, indent=4)

def main():
    classes = ["ToyCar" , "ToyTrain", "bearing", "fan", "gearbox", "slider", "valve"]
    data_type = ['test', 'train']
    all_entries = []

    for class_name in classes:
        for data in data_type:
            folder_path = f'../unziped/dev/{class_name}/{data}'
            result_folder = os.path.join(folder_path, "Feature_Extraction_Results")
            os.makedirs(result_folder, exist_ok=True)
            
            # 모든 wav 파일 처리
            wav_files = [f for f in os.listdir(folder_path) if f.endswith('.wav')]
            
            for wav_file in wav_files:
                file_path = os.path.join(folder_path, wav_file)
                features = extract_features(file_path, result_folder)
                
                # note 추가 (자동 생성)
                note = f"This is a {class_name} machine, data type: {data}, file: {wav_file}"
                
                entry = {
                    "file_path": features["file_path"],
                    "machine_type": class_name,
                    "section": "00",  # 섹션 정보를 동적으로 설정 가능
                    "domain": data,  # 'train' 또는 'test'
                    "features": features["features"],
                    "note": note  # 주석 추가
                }
                all_entries.append(entry)
    
    # JSON 파일로 저장
    output_json_path = 'audio_features.json'
    create_json_structure(all_entries, output_json_path)
    print(f"JSON 파일이 '{output_json_path}' 경로에 저장되었습니다.")

if __name__ == "__main__":
    main()


JSON 파일이 'audio_features.json' 경로에 저장되었습니다.


### 더 해볼만한 것들???

```
{
            "pair_id": "pair_001",
            "pair_info": {
                "ssim_similarity_score": 1.0,
                "handwrite": ""
            },
            "normal_data": {
                "file_path": "../unziped/dev/ToyTrain/test/section_00_source_test_normal_0050_noAttribute.wav/file.wav",
                "machine_type": "ToyTrain",
                "section": "00",
                "domain": "test",
                "features": {
                    "zero_crossing_rate": 0.012,
                    "harmonic_to_noise_ratio": 0.75,
                    "spectral_flatness": 0.3,
                    "spectral_rolloff": 2800.0,
                    "rms_energy": 0.05,
                    "entropy": 1.45,
                    "waveform": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_waveform.png",
                    "envelope": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_envelope.png",
                    "cepstrum": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_cepstrum.png",
                    "mel_spectrogram": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_mel.png",
                    "mfcc": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_mfcc.png",
                    "linear_spectrogram": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_linear.png",
                    "chroma_features": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_chroma.png",
                    "spectral_centroid": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_spectral_centroid.png",
                    "spectral_bandwidth": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_spectral_bandwidth.png",
                    "wavelet_transform": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_wavelet.png",
                    "power_spectrum": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_power_spectrum.png",
                    "std": 0.123,
                    "avg": 0.456
                }
            },
            "anomaly_data": {
                "file_path": "../unziped/dev/ToyTrain/test/section_00_source_test_anomaly_0037_noAttribute.wav/file.wav",
                "machine_type": "ToyTrain",
                "section": "00",
                "domain": "test",
                "features": {
                    "zero_crossing_rate": 0.012,
                    "harmonic_to_noise_ratio": 0.75,
                    "spectral_flatness": 0.3,
                    "spectral_rolloff": 2800.0,
                    "rms_energy": 0.05,
                    "entropy": 1.45,
                    "waveform": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_waveform.png",
                    "envelope": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_envelope.png",
                    "cepstrum": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_cepstrum.png",
                    "mel_spectrogram": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_mel.png",
                    "mfcc": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_mfcc.png",
                    "linear_spectrogram": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_linear.png",
                    "chroma_features": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_chroma.png",
                    "spectral_centroid": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_spectral_centroid.png",
                    "spectral_bandwidth": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_spectral_bandwidth.png",
                    "wavelet_transform": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_wavelet.png",
                    "power_spectrum": "../unziped/dev/ToyTrain/test/Feature_Extraction_Results/file_power_spectrum.png",
                    "std": 0.123,
                    "avg": 0.456
                }
            }
        },
```

- 이 데이터 구조를 사용해서 이를 이용해서 여러가지 데이터를 구할 수 있다. 
- 현재 handwrite를 하나만 사용중이지만 각각의 png 데이터에 대해서 note를 추가해서 저장할 수 있도록 바꾸는 게 좋아보인다.

### pair data

실제로 사용 가능한지 검증해야함.
아직은 difference를 만들지 않았기 때문에

In [13]:
import os
import json
import pandas as pd

# DATASETS_DIR 설정
DATASETS_DIR = "../../unziped/dev"  # Path to the datasets directory
CLASS_NAMES = [name for name in os.listdir(DATASETS_DIR) if os.path.isdir(os.path.join(DATASETS_DIR, name))]

# 데이터셋 로드 함수
def load_dataset(attributes_file, datasets_dir, class_name):
    if not os.path.isfile(attributes_file):
        raise FileNotFoundError(f"Attributes file not found: {attributes_file}")

    df = pd.read_csv(attributes_file)
    filenames = df['file_name'].tolist()
    labels = ['anomaly' if 'anomaly' in name.lower() else 'normal' for name in filenames]
    
    # Create file paths by combining datasets_dir, class_name, and filename
    file_paths = [os.path.join(datasets_dir, f) for f in filenames]

    return file_paths, labels

# anomaly와 normal 파일 매칭 함수
def find_matching_normal_file(anomaly_path, normal_paths):
    anomaly_filename = os.path.basename(anomaly_path)
    anomaly_parts = anomaly_filename.split("_")

    anomaly_source_target = anomaly_parts[2] if len(anomaly_parts) > 2 else None
    anomaly_number = anomaly_parts[5] if len(anomaly_parts) > 5 else None
    anomaly_code = anomaly_parts[6] if len(anomaly_parts) > 6 else None

    if not all([anomaly_source_target, anomaly_number, anomaly_code]):
        return None

    for normal_path in normal_paths:
        normal_filename = os.path.basename(normal_path)
        normal_parts = normal_filename.split("_")

        normal_source_target = normal_parts[2] if len(normal_parts) > 2 else None
        normal_number = normal_parts[5] if len(normal_parts) > 5 else None
        normal_code = normal_parts[6] if len(normal_parts) > 6 else None

        if (
            anomaly_source_target == normal_source_target and
            anomaly_number == normal_number and
            anomaly_code == normal_code
        ):
            return normal_path
    return None

# JSON 파일 생성 함수
def create_audio_pair_notes_json(class_names, datasets_dir, output_path):
    data = []
    for class_name in class_names:
        attributes_file = os.path.join(datasets_dir, class_name, "attributes_00.csv")
        try:
            file_paths, labels = load_dataset(attributes_file, datasets_dir, class_name)
            normal_paths = [path for path, label in zip(file_paths, labels) if label == "normal"]
            anomaly_paths = [path for path, label in zip(file_paths, labels) if label == "anomaly"]

            for anomaly_path in anomaly_paths:
                normal_path = find_matching_normal_file(anomaly_path, normal_paths)
                if normal_path:
                    data.append({
                        "normal_file_path": normal_path,
                        "anomaly_file_path": anomaly_path,
                        "machine_type": class_name,
                        "section": "00",  # 필요 시 동적 설정 가능
                        "domain": "test",  # 필요 시 동적 설정 가능
                        "same_note": "",
                        "difference_note": "",
                        "normal_note": "",
                        "anomaly_note": ""
                    })
        except FileNotFoundError as e:
            print(f"Error loading dataset for class {class_name}: {e}")

    # JSON 파일 저장
    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=4)
    print(f"JSON 파일이 생성되었습니다: {output_path}")

# JSON 파일 경로
output_json_path = "../../extract_feature_code/audio_pair_notes.json"

# JSON 파일 생성 실행
create_audio_pair_notes_json(CLASS_NAMES, DATASETS_DIR, output_json_path)


JSON 파일이 생성되었습니다: ../../extract_feature_code/audio_pair_notes.json
