# 단일음정 유지 능력 평가를 위한 SPICE 기반 코드 (Clean 버전)

In [None]:
# 필요한 라이브러리 import
import tensorflow_hub as hub
import tensorflow as tf
import numpy as np
import librosa
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings('ignore')

# TensorFlow 설정 최적화
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        pass

tf.config.run_functions_eagerly(False)

# TensorFlow Hub 캐시 클리어
import tempfile
import shutil

def clear_tfhub_cache():
    try:
        cache_dir = os.path.join(tempfile.gettempdir(), 'tfhub_modules')
        if os.path.exists(cache_dir):
            shutil.rmtree(cache_dir)
    except Exception as e:
        pass

clear_tfhub_cache()

In [None]:
# 1. 오디오 로드 함수
def load_audio(filepath, sample_rate=16000):
    """오디오 파일을 로드하고 정규화"""
    audio, sr = librosa.load(filepath, sr=sample_rate)
    if np.max(np.abs(audio)) > 0:
        audio = audio / np.max(np.abs(audio))
    return audio, sr

In [None]:
# 2. SPICE 모델 전용 피치 예측
def estimate_pitch_spice_only(audio, sr=16000):
    """SPICE 모델을 사용한 피치 추정"""
    try:
        tf.keras.backend.clear_session()
        
        with tf.device('/CPU:0'):
            model = hub.load("https://tfhub.dev/google/spice/2")
        
        if np.max(np.abs(audio)) == 0:
            raise ValueError("오디오에 신호가 없습니다")
        
        audio_tensor = tf.convert_to_tensor(audio, dtype=tf.float32)
        
        with tf.device('/CPU:0'):
            signature_keys = list(model.signatures.keys())
            
            # 모델 워밍업
            dummy_audio = tf.zeros([1000], dtype=tf.float32)
            try:
                _ = model.signatures["serving_default"](dummy_audio)
            except Exception as warmup_error:
                pass
            
            # 실제 모델 실행
            outputs = model.signatures["serving_default"](audio_tensor)
            
            pitch = outputs["pitch"].numpy().flatten()
            uncertainty = outputs["uncertainty"].numpy().flatten()
            confidence = 1.0 - uncertainty
        
        return pitch, confidence
        
    except Exception as e:
        raise e

In [None]:
# 3. 신뢰도로 피치 필터링
def filter_pitch(pitch, confidence, threshold=0.7):
    """신뢰도가 높은 피치만 필터링"""
    filtered = [p if c >= threshold else 0 for p, c in zip(pitch, confidence)]
    return filtered

In [None]:
# 4. 이동 표준편차 계산
def moving_std(seq, win=5):
    """이동 윈도우 표준편차 계산"""
    if len(seq) == 0:
        return []
    
    padded = np.pad(seq, (win//2,), mode='edge')
    std_values = []
    
    for i in range(len(seq)):
        window = padded[i:i+win]
        valid_values = window[window > 0]
        if len(valid_values) > 1:
            std_values.append(np.std(valid_values))
        else:
            std_values.append(0.0)
    
    return std_values

In [None]:
# 5. 단일음정 구간 판별 및 평가
def evaluate_pitch_stability(filtered_pitch, std_threshold=1.5, actual_duration=None):
    """피치 안정성 평가"""
    if len(filtered_pitch) == 0:
        return 0, 0, 0, []
    
    pitch_std = moving_std(filtered_pitch, win=5)
    mono_flags = [s < std_threshold and p > 0 for s, p in zip(pitch_std, filtered_pitch)]
    
    if actual_duration is not None:
        actual_fps = len(filtered_pitch) / actual_duration
        mono_duration = sum(mono_flags) / actual_fps
        total_duration = actual_duration
    else:
        mono_duration = sum(mono_flags) / 100
        total_duration = len(filtered_pitch) / 100
    
    stable_ratio = mono_duration / total_duration if total_duration > 0 else 0
    
    return stable_ratio, mono_duration, total_duration, mono_flags

In [None]:
# 6. 전체 파이프라인 함수 (SPICE 전용)
def analyze_pitch_stability(filepath, std_threshold=1.5, confidence_threshold=0.7, window_size=5):
    """SPICE 전용 피치 안정성 분석 파이프라인"""
    
    try:
        # 1. 오디오 로드
        audio, sr = load_audio(filepath)
        actual_duration = len(audio) / sr
        
        # 2. SPICE로 피치 추정
        pitch, confidence = estimate_pitch_spice_only(audio, sr)
        
        # 3. 피치 필터링 (조정 가능한 파라미터)
        filtered_pitch = filter_pitch(pitch, confidence, threshold=confidence_threshold)
        
        # 4. 안정성 평가 (조정 가능한 파라미터)
        def custom_moving_std(seq, win):
            if len(seq) == 0:
                return []
            padded = np.pad(seq, (win//2,), mode='edge')
            std_values = []
            for i in range(len(seq)):
                window = padded[i:i+win]
                valid_values = window[window > 0]
                if len(valid_values) > 1:
                    std_values.append(np.std(valid_values))
                else:
                    std_values.append(0.0)
            return std_values
        
        pitch_std = custom_moving_std(filtered_pitch, window_size)
        mono_flags = [s < std_threshold and p > 0 for s, p in zip(pitch_std, filtered_pitch)]
        
        # 5. 결과 계산
        actual_fps = len(filtered_pitch) / actual_duration
        mono_duration = sum(mono_flags) / actual_fps
        stable_ratio = mono_duration / actual_duration
        
        # 6. 시각화
        plt.figure(figsize=(15, 8))
        
        # 서브플롯 1: 원본 피치
        plt.subplot(3, 1, 1)
        time_axis = np.arange(len(pitch)) / actual_fps
        plt.plot(time_axis, pitch, 'b-', alpha=0.7, label='SPICE Raw Pitch')
        plt.ylabel('Pitch (Hz)')
        plt.title(f'SPICE Raw Pitch Estimation ({actual_fps:.1f}fps)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # 서브플롯 2: 필터링된 피치
        plt.subplot(3, 1, 2)
        time_axis_filtered = np.arange(len(filtered_pitch)) / actual_fps
        plt.plot(time_axis_filtered, filtered_pitch, 'g-', linewidth=2, label='SPICE Filtered Pitch')
        plt.ylabel('Pitch (Hz)')
        plt.title('SPICE Filtered Pitch (High Confidence Only)')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        # 서브플롯 3: 안정성 분석
        plt.subplot(3, 1, 3)
        if len(filtered_pitch) > 0:
            plt.plot(time_axis_filtered, pitch_std, 'r-', label='Moving Std', alpha=0.7)
            plt.axhline(y=std_threshold, color='orange', linestyle='--', label='Stability Threshold')
            
            stable_regions = np.array(mono_flags) * max(pitch_std) * 0.1 if pitch_std else np.array(mono_flags)
            plt.fill_between(time_axis_filtered, 0, stable_regions, 
                           alpha=0.3, color='green', label='Stable Regions')
        
        plt.xlabel('Time (seconds)')
        plt.ylabel('Pitch Std')
        plt.title('SPICE Pitch Stability Analysis')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        
        return {
            'stable_ratio': stable_ratio,
            'mono_duration': mono_duration,
            'total_duration': actual_duration,
            'frames_total': len(pitch),
            'frames_stable': sum(mono_flags)
        }
        
    except Exception as e:
        raise e

In [None]:
# 사용 예제
filepath = "0_아/e_1_1.wav"

# 기본 파라미터로 분석
result = analyze_pitch_stability(filepath)
print(f"단일음정 시간: {result['mono_duration']:.2f}초")
print(f"안정성 비율: {result['stable_ratio']:.2%}")

In [None]:
# 하이퍼파라미터 조정 예제 (10초에 가깝게)
result_tuned = analyze_pitch_stability(
    filepath, 
    std_threshold=50.0,      # 매우 관대한 안정성 기준
    confidence_threshold=0.1, # 낮은 신뢰도도 허용
    window_size=25           # 큰 윈도우로 부드럽게
)
print(f"조정된 단일음정 시간: {result_tuned['mono_duration']:.2f}초")
print(f"조정된 안정성 비율: {result_tuned['stable_ratio']:.2%}")