In [1]:
!pip install praat-parselmouth numpy



In [2]:
import os
import subprocess
import parselmouth
import numpy as np
from parselmouth.praat import call

In [None]:
def analyze_similarity(audio1_path, audio2_path):
    praat_script_path = "segment_and_analyze.praat"
    
    def create_textgrid(audio_path):
        subprocess.run(
            ["praat", "--run", praat_script_path, audio_path],
            check=True
        )
        return audio_path.replace(".wav", ".TextGrid")
    
    def extract_features(audio_path, textgrid_path):
        sound = parselmouth.Sound(audio_path)
        features = []
        intervals = extract_intervals_from_textgrid(textgrid_path)
        
        for i, (start, end, label) in enumerate(intervals):
            segment = sound.extract_part(from_time=start, to_time=end)
            pitch = call(segment.to_pitch(), "Get mean", start, end, "Hertz")
            formant = segment.to_formant_burg()
            f1 = call(formant, "Get value at time", 1, (start + end) / 2, "Hertz", "Linear")
            f2 = call(formant, "Get value at time", 2, (start + end) / 2, "Hertz", "Linear")
            mfcc = segment.to_mfcc()
            timing = end - start
            
            features.append({
                "label": label,
                "start": start,
                "end": end,
                "pitch": pitch,
                "formants": (f1, f2),
                "mfcc": mfcc,
                "timing": timing
            })
        return features
    
    def extract_intervals_from_textgrid(textgrid_path):
        intervals = []
        with open(textgrid_path, "r") as f:
            lines = f.readlines()
            start, end, label = None, None, None
            for line in lines:
                if "xmin" in line:
                    start = float(line.split("=")[1].strip())
                elif "xmax" in line:
                    end = float(line.split("=")[1].strip())
                elif "text" in line:
                    label = line.split("=")[1].strip().replace('"', '')
                    if label:  # 빈 텍스트 무시
                        intervals.append((start, end, label))
        return intervals
    
    def calculate_similarity(features1, features2):
        results = []
        for word_idx, (word1, word2) in enumerate(zip(features1, features2), start=1):
            word_result = {
                "word": word1["label"],
                "segments": []
            }
            for phoneme1, phoneme2 in zip(word1["segments"], word2["segments"]):
                pitch_sim = np.corrcoef([phoneme1["pitch"], phoneme2["pitch"]])[0, 1]
                formant_sim = np.linalg.norm(
                    np.array(phoneme1["formants"]) - np.array(phoneme2["formants"])
                )
                mfcc_sim = np.linalg.norm(phoneme1["mfcc"] - phoneme2["mfcc"])
                timing_diff = phoneme1["timing"] - phoneme2["timing"]
                word_result["segments"].append({
                    "phoneme": phoneme1["label"],
                    "pitch_similarity": pitch_sim,
                    "formant_similarity": formant_sim,
                    "mfcc_similarity": mfcc_sim,
                    "timing_difference": timing_diff
                })
            results.append(word_result)
        return results
    
    textgrid1 = create_textgrid(audio1_path)
    textgrid2 = create_textgrid(audio2_path)
    features1 = extract_features(audio1_path, textgrid1)
    features2 = extract_features(audio2_path, textgrid2)
    
    similarity_results = calculate_similarity(features1, features2)
    
    # 결과 출력
    for word in similarity_results:
        print(f"단어 '{word['word']}' 비교:")
        for segment in word["segments"]:
            print(f"  음소 '{segment['phoneme']}' 비교:")
            print(f"    피치 유사도: {segment['pitch_similarity']:.2f}")
            print(f"    포먼트 유사도: {segment['formant_similarity']:.2f}")
            print(f"    MFCC 유사도: {segment['mfcc_similarity']:.2f}")
            print(f"    타이밍 차이: {segment['timing_difference']:.2f} 초")
    return similarity_results

# 실행
audio1 = "path_to_audio1.wav"
audio2 = "path_to_audio2.wav"
analyze_similarity(audio1, audio2)
