In [6]:
import json
import string
from collections import defaultdict
import numpy as np

# Function to calculate WER
def wer(reference, hypothesis):

    r = normalize_text(reference).split()
    h = normalize_text(hypothesis).split()
    
    d = np.zeros((len(r) + 1, len(h) + 1), dtype=np.uint8)
    
    for i in range(len(r) + 1):
        d[i][0] = i
    for j in range(len(h) + 1):
        d[0][j] = j
        
    for i in range(1, len(r) + 1):
        for j in range(1, len(h) + 1):
            if r[i-1] == h[j-1]:
                d[i][j] = d[i-1][j-1]
            else:
                substitute = d[i-1][j-1] + 1
                insert = d[i][j-1] + 1
                delete = d[i-1][j] + 1
                d[i][j] = min(substitute, insert, delete)
    
    wer_value = d[len(r)][len(h)] / float(len(r))
    return wer_value

# Function to normalize text by removing punctuation and converting to lowercase
def normalize_text(text):
    text = text.lower()  # Convert to lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    return text

# Function to print sample WER calculations
def print_sample_calculations(data, asr_methods, num_samples=5):
    samples_printed = 0
    print("\nSample WER Calculations:\n")
    
    for item in data:
        if samples_printed < num_samples:
            ground_truth = item["groundtruth"]
            emotion = item["emotion"]
            print(f"ID: {item['id']}")
            print(f"Ground Truth: {ground_truth}")
            print(f"Emotion: {emotion}")
            
            for method in asr_methods:
                hypothesis = item[method]
                wer_value = wer(ground_truth, hypothesis)
                print(f"  {method}: {hypothesis}")
                print(f"  WER: {wer_value:.2f}")
            
            print("\n")
            samples_printed += 1

# Load the training data
file_path = 'F:\\SLT_2024\\iemocap_script_allemo.json'
with open(file_path, 'r') as file:
    data = json.load(file)

# Initialize dictionaries to store WER results and sample counts
wer_results = defaultdict(lambda: defaultdict(list))
overall_wer_results = defaultdict(list)
emotion_counts = defaultdict(int)

# List of ASR methods
asr_methods = [
    "hubertlarge", "w2v2100", "w2v2960", "w2v2960large", "w2v2960largeself",
    "wavlmplus", "whisperbase", "whisperlarge", "whispermedium", "whispersmall", "whispertiny"
]

# Major emotions
major_emotions = {"neu", "sad", "hap", "ang"}

# Process each item in the dataset
for item in data:
    ground_truth = item["groundtruth"]
    emotion = item["emotion"] if item["emotion"] in major_emotions else "other"
    emotion_counts[emotion] += 1
    
    for method in asr_methods:
        hypothesis = item[method]
        wer_value = wer(ground_truth, hypothesis)
        wer_results[method][emotion].append(wer_value)
        overall_wer_results[method].append(wer_value)

# Calculate average WER for each method under different emotions and overall
average_wer_results = defaultdict(dict)
for method in asr_methods:
    for emotion in wer_results[method]:
        average_wer_results[method][emotion] = np.mean(wer_results[method][emotion])
    average_wer_results[method]["overall"] = np.mean(overall_wer_results[method])

# Print the results
for method in asr_methods:
    print(f"WER for {method}:")
    for emotion in average_wer_results[method]:
        print(f"  {emotion}: {average_wer_results[method][emotion]:.2f}")
    print()

# Print the number of samples for each emotion
print("Number of samples for each emotion:")
for emotion, count in emotion_counts.items():
    print(f"  {emotion}: {count}")

# Print sample WER calculations
print_sample_calculations(data, asr_methods)


WER for hubertlarge:
  other: 0.19
  neu: 0.20
  ang: 0.18
  hap: 0.25
  sad: 0.18
  overall: 0.20

WER for w2v2100:
  other: 0.36
  neu: 0.41
  ang: 0.37
  hap: 0.43
  sad: 0.34
  overall: 0.38

WER for w2v2960:
  other: 0.28
  neu: 0.35
  ang: 0.26
  hap: 0.33
  sad: 0.28
  overall: 0.29

WER for w2v2960large:
  other: 0.24
  neu: 0.31
  ang: 0.22
  hap: 0.29
  sad: 0.24
  overall: 0.25

WER for w2v2960largeself:
  other: 0.18
  neu: 0.23
  ang: 0.15
  hap: 0.22
  sad: 0.18
  overall: 0.19

WER for wavlmplus:
  other: 0.37
  neu: 0.45
  ang: 0.32
  hap: 0.43
  sad: 0.39
  overall: 0.38

WER for whisperbase:
  other: 0.38
  neu: 0.36
  ang: 0.34
  hap: 0.48
  sad: 0.40
  overall: 0.39

WER for whisperlarge:
  other: 0.36
  neu: 0.36
  ang: 0.35
  hap: 0.46
  sad: 0.41
  overall: 0.38

WER for whispermedium:
  other: 0.35
  neu: 0.33
  ang: 0.34
  hap: 0.45
  sad: 0.37
  overall: 0.36

WER for whispersmall:
  other: 0.36
  neu: 0.35
  ang: 0.33
  hap: 0.47
  sad: 0.39
  overall: 0.37

