<a href="https://colab.research.google.com/github/chaeyoooo/capstondesign_voicefishing/blob/main/%EC%A0%95%EC%83%81_%EC%9D%8C%EC%84%B1_%EB%B0%95%EC%8A%A4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# —— 0) Colab + Google Drive 마운트 ——
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# —— 1) 라이브러리 import & 경로 설정 ——
import os
import subprocess
import re
import librosa
import soundfile as sf
import numpy as np
import pandas as pd

# (자신의 Drive 경로에 맞게 수정)
SRC_DIR     = '/content/drive/MyDrive/voicefishing_all/realword/'
WAV_DIR     = os.path.join(SRC_DIR, 'wav')
TRIM_DIR    = os.path.join(SRC_DIR, 'trimmed')
CSV_FEATURE = os.path.join(SRC_DIR, 'audio_features_malmoongchi(real).csv')

# 출력 폴더 생성
os.makedirs(WAV_DIR, exist_ok=True)
os.makedirs(TRIM_DIR, exist_ok=True)

# —— 2) PCM → WAV 변환 함수 ——
def pcm_to_wav(pcm_path, wav_path,
               sample_rate=16000, channels=1, bit_depth=16):
    fmt = f"s{bit_depth}le"
    cmd = [
        'ffmpeg', '-y',
        '-f', fmt,
        '-ar', str(sample_rate),
        '-ac', str(channels),
        '-i', pcm_path,
        wav_path
    ]
    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

# —— 3) 무음 트리밍 함수 ——
def trim_silence(y, top_db=20):
    # 리턴값이 (y_trimmed, intervals)이므로 [0]만 사용
    return librosa.effects.trim(y, top_db=top_db)[0]

# —— 4) 오디오 파일 재귀 검색 ——
audio_paths = []
for root, _, files in os.walk(SRC_DIR):
    for f in files:
        if f.lower().endswith(('.pcm', '.mp3', '.wav')):
            audio_paths.append(os.path.join(root, f))

print(f"▶ 총 {len(audio_paths)}개의 오디오 파일 발견")

# —— 5) 전체 파이프라인 실행 ——
features = []

for in_path in sorted(audio_paths):
    fname = os.path.basename(in_path)
    base, ext = os.path.splitext(fname.lower())

    # 5.1) WAV_DIR 내 저장될 경로 결정
    wav_name = base + '.wav'
    wav_path = os.path.join(WAV_DIR, wav_name)

    # 5.2) 포맷별 처리
    if ext == '.pcm':
        pcm_to_wav(in_path, wav_path,
                   sample_rate=16000, channels=1, bit_depth=16)
    elif ext == '.mp3':
        subprocess.run(
            ['ffmpeg', '-y', '-i', in_path, wav_path],
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
        )
    elif ext == '.wav':
        # 그냥 복사
        subprocess.run(['cp', in_path, wav_path])
    else:
        continue  # (이 라인에 들어올 일은 없음)

    # 5.3) 로드 & 무음 트리밍
    y, sr = librosa.load(wav_path, sr=None)  # 원본 SR 유지
    y_trim = trim_silence(y, top_db=20)

    # 5.4) 특징 추출
    duration_sec = len(y_trim) / sr
    mfcc         = librosa.feature.mfcc(y=y_trim, sr=sr, n_mfcc=13).mean(axis=1)
    mel_spec     = librosa.feature.melspectrogram(y=y_trim, sr=sr).mean()
    rmse         = librosa.feature.rms(y=y_trim).mean()
    pitches, mags= librosa.piptrack(y=y_trim, sr=sr)
    pitch_vals   = pitches[mags > np.median(mags)]
    pitch_mean   = float(np.mean(pitch_vals)) if pitch_vals.size else 0.0

    # 5.5) 트리밍된 WAV 저장 (안전한 파일명)
    # 원본 경로 기준 상대경로를 파일명에 포함시키면 중복 방지
    rel = os.path.relpath(in_path, SRC_DIR)
    safe = re.sub(r'[^0-9A-Za-z_-]', '_', rel)[:100]
    out_fname = f"{safe}_trim.wav"
    out_path  = os.path.join(TRIM_DIR, out_fname)
    sf.write(out_path, y_trim, sr, format='WAV')

    # 5.6) 피처 딕셔너리
    feat = {
        'original_file': rel,
        'sr': sr,
        'duration_sec': duration_sec,
        'rmse': rmse,
        'pitch_mean': pitch_mean,
        'mel_spec': mel_spec
    }
    for i, c in enumerate(mfcc, 1):
        feat[f'mfcc_{i}'] = c

    features.append(feat)
    print(f"Processed: {rel} → {out_fname}")

# —— 6) CSV 저장 ——
df = pd.DataFrame(features)
df.to_csv(CSV_FEATURE, index=False)
print(f"✔ All features saved to {CSV_FEATURE}")


[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Processed: SDRW2400002273/SDRW2400002273.1.1.129.pcm → SDRW2400002273_SDRW2400002273_1_1_129_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.13.pcm → SDRW2400002273_SDRW2400002273_1_1_13_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.130.pcm → SDRW2400002273_SDRW2400002273_1_1_130_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.131.pcm → SDRW2400002273_SDRW2400002273_1_1_131_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.132.pcm → SDRW2400002273_SDRW2400002273_1_1_132_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.133.pcm → SDRW2400002273_SDRW2400002273_1_1_133_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.134.pcm → SDRW2400002273_SDRW2400002273_1_1_134_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.135.pcm → SDRW2400002273_SDRW2400002273_1_1_135_pcm_trim.wav
Processed: SDRW2400002273/SDRW2400002273.1.1.136.pcm → SDRW2400002273_SDRW2400002273_1_1_136_pcm_trim.wa