In [3]:
import os
import json
import numpy as np
from tensorflow.keras.models import load_model

# Load model
model = load_model("autodl-tmp/model/epoch7.h5")
print("Model loaded successfully.")

# Load label and genre mappings
label_path = "label_map.json"

with open(label_path, "r", encoding="utf-8") as f:
    label2id = json.load(f)
id2label = {v: k for k, v in label2id.items()}



# Folder with .npy cochleagram files
test_folder = "autodl-tmp/NPYSyllable(praat)/Syllable_Music1_V_Seg"

# List of .npy files
npy_files = sorted([
    os.path.join(test_folder, f)
    for f in os.listdir(test_folder)
    if f.endswith(".npy")
])

# Softmax with temperature
def softmax_with_temperature(logits, T=1.5):
    scaled = logits / T
    exp_logits = np.exp(scaled - np.max(scaled))  # Stability
    return exp_logits / np.sum(exp_logits)

# Iterate through test samples
for i, sample_path in enumerate(npy_files):
    sample = np.load(sample_path)

    # Reshape to (1, 256, 256, 1)
    if sample.ndim == 2:
        sample = np.expand_dims(sample, axis=-1)
    sample = sample.reshape(1, 256, 256, 1)

    # Predict
    pred_word, pred_genre = model.predict(sample)

    # Apply temperature softmax
    word_probs = softmax_with_temperature(pred_word[0], T=1.5)

    # Get top-5 predictions
    top5_word_indices = np.argsort(word_probs)[::-1][:5]

    # Output
    print(f"\n File: {os.path.basename(sample_path)}")

    print("Top-5 Word Predictions:")
    for rank, idx in enumerate(top5_word_indices):
        label = id2label.get(idx, "Unknown")
        prob = word_probs[idx]
        print(f"  Top-{rank+1}: Class ID = {idx}, Label = {label}, Probability = {prob:.4f}")




Model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 632ms/step

 File: Music1_001.npy
Top-5 Word Predictions:
  Top-1: Class ID = 4, Label = 互, Probability = 0.0218
  Top-2: Class ID = 21, Label = 和, Probability = 0.0119
  Top-3: Class ID = 52, Label = 梯, Probability = 0.0119
  Top-4: Class ID = 44, Label = 明, Probability = 0.0119
  Top-5: Class ID = 59, Label = 画, Probability = 0.0119
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 44ms/step

 File: Music1_002.npy
Top-5 Word Predictions:
  Top-1: Class ID = 68, Label = 美, Probability = 0.0229
  Top-2: Class ID = 36, Label = 我, Probability = 0.0118
  Top-3: Class ID = 46, Label = 暖, Probability = 0.0118
  Top-4: Class ID = 83, Label = 首, Probability = 0.0118
  Top-5: Class ID = 80, Label = 阔, Probability = 0.0118
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step

 File: Music1_003.npy
Top-5 Word Predictions:
  Top-1: Class ID = 42, Label = 改, Probability = 0.0211


In [12]:
import os
import json
import numpy as np
from tensorflow.keras.models import load_model

# Load model
model = load_model("autodl-tmp/fine_tuned_model.h5")
print("Model loaded successfully.")

# Load label and genre mappings
label_path = "autodl-tmp/TrainSet/labels/label_map.json"

with open(label_path, "r", encoding="utf-8") as f:
    label2id = json.load(f)
id2label = {v: k for k, v in label2id.items()}



# Folder with .npy cochleagram files
test_folder = "autodl-tmp/TestSet/cochleagrams_npy/Music2_V_Seg"

# List of .npy files
npy_files = sorted([
    os.path.join(test_folder, f)
    for f in os.listdir(test_folder)
    if f.endswith(".npy")
])

# Softmax with temperature
def softmax_with_temperature(logits, T=1.5):
    scaled = logits / T
    exp_logits = np.exp(scaled - np.max(scaled))  # Stability
    return exp_logits / np.sum(exp_logits)

# Iterate through test samples
for i, sample_path in enumerate(npy_files):
    sample = np.load(sample_path)

    # Reshape to (1, 256, 256, 1)
    if sample.ndim == 2:
        sample = np.expand_dims(sample, axis=-1)
    sample = sample.reshape(1, 256, 256, 1)

    # Predict
    pred_word, pred_genre = model.predict(sample)

    # Apply temperature softmax
    word_probs = softmax_with_temperature(pred_word[0], T=1.5)

    # Get top-5 predictions
    top5_word_indices = np.argsort(word_probs)[::-1][:5]

    # Output
    print(f"\n File: {os.path.basename(sample_path)}")

    print("Top-5 Word Predictions:")
    for rank, idx in enumerate(top5_word_indices):
        label = id2label.get(idx, "Unknown")
        prob = word_probs[idx]
        print(f"  Top-{rank+1}: Class ID = {idx}, Label = {label}, Probability = {prob:.4f}")



Model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 616ms/step

 File: Music2_001.npy
Top-5 Word Predictions:
  Top-1: Class ID = 48, Label = 爱, Probability = 0.0238
  Top-2: Class ID = 51, Label = 的, Probability = 0.0183
  Top-3: Class ID = 5, Label = 互联, Probability = 0.0162
  Top-4: Class ID = 10, Label = 你, Probability = 0.0160
  Top-5: Class ID = 4, Label = 中, Probability = 0.0158
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step

 File: Music2_002.npy
Top-5 Word Predictions:
  Top-1: Class ID = 31, Label = 我想, Probability = 0.0305
  Top-2: Class ID = 45, Label = 梦想, Probability = 0.0156
  Top-3: Class ID = 61, Label = 颜色, Probability = 0.0156
  Top-4: Class ID = 59, Label = 遥望, Probability = 0.0156
  Top-5: Class ID = 58, Label = 辽阔, Probability = 0.0156
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

 File: Music2_003.npy
Top-5 Word Predictions:
  Top-1: Class ID = 30, Label = 我, Probability = 0.

In [13]:
import os
import json
import numpy as np
from tensorflow.keras.models import load_model

# Load model
model = load_model("autodl-tmp/fine_tuned_model.h5")
print("Model loaded successfully.")

# Load label and genre mappings
label_path = "autodl-tmp/TrainSet/labels/label_map.json"

with open(label_path, "r", encoding="utf-8") as f:
    label2id = json.load(f)
id2label = {v: k for k, v in label2id.items()}



# Folder with .npy cochleagram files
test_folder = "autodl-tmp/TestSet/cochleagrams_npy/Music3_V_Seg"

# List of .npy files
npy_files = sorted([
    os.path.join(test_folder, f)
    for f in os.listdir(test_folder)
    if f.endswith(".npy")
])

# Softmax with temperature
def softmax_with_temperature(logits, T=1.5):
    scaled = logits / T
    exp_logits = np.exp(scaled - np.max(scaled))  # Stability
    return exp_logits / np.sum(exp_logits)

# Iterate through test samples
for i, sample_path in enumerate(npy_files):
    sample = np.load(sample_path)

    # Reshape to (1, 256, 256, 1)
    if sample.ndim == 2:
        sample = np.expand_dims(sample, axis=-1)
    sample = sample.reshape(1, 256, 256, 1)

    # Predict
    pred_word, pred_genre = model.predict(sample)

    # Apply temperature softmax
    word_probs = softmax_with_temperature(pred_word[0], T=1.5)

    # Get top-5 predictions
    top5_word_indices = np.argsort(word_probs)[::-1][:5]

    # Output
    print(f"\n File: {os.path.basename(sample_path)}")

    print("Top-5 Word Predictions:")
    for rank, idx in enumerate(top5_word_indices):
        label = id2label.get(idx, "Unknown")
        prob = word_probs[idx]
        print(f"  Top-{rank+1}: Class ID = {idx}, Label = {label}, Probability = {prob:.4f}")



Model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 619ms/step

 File: Music3_001.npy
Top-5 Word Predictions:
  Top-1: Class ID = 48, Label = 爱, Probability = 0.0240
  Top-2: Class ID = 51, Label = 的, Probability = 0.0173
  Top-3: Class ID = 5, Label = 互联, Probability = 0.0161
  Top-4: Class ID = 10, Label = 你, Probability = 0.0161
  Top-5: Class ID = 4, Label = 中, Probability = 0.0159
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 42ms/step

 File: Music3_002.npy
Top-5 Word Predictions:
  Top-1: Class ID = 31, Label = 我想, Probability = 0.0305
  Top-2: Class ID = 61, Label = 颜色, Probability = 0.0156
  Top-3: Class ID = 62, Label = 首, Probability = 0.0156
  Top-4: Class ID = 59, Label = 遥望, Probability = 0.0156
  Top-5: Class ID = 58, Label = 辽阔, Probability = 0.0156
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

 File: Music3_003.npy
Top-5 Word Predictions:
  Top-1: Class ID = 30, Label = 我, Probability = 0.0

In [11]:
import os
import json
import numpy as np
from tensorflow.keras.models import load_model

# Load model
model = load_model("autodl-tmp/fine_tuned_model.h5")
print("Model loaded successfully.")

# Load label and genre mappings
label_path = "autodl-tmp/TrainSet/labels/label_map.json"

with open(label_path, "r", encoding="utf-8") as f:
    label2id = json.load(f)
id2label = {v: k for k, v in label2id.items()}



# Folder with .npy cochleagram files
test_folder = "autodl-tmp/TestSet/cochleagrams_npy/Music4_V_Seg"

# List of .npy files
npy_files = sorted([
    os.path.join(test_folder, f)
    for f in os.listdir(test_folder)
    if f.endswith(".npy")
])

# Softmax with temperature
def softmax_with_temperature(logits, T=1.5):
    scaled = logits / T
    exp_logits = np.exp(scaled - np.max(scaled))  # Stability
    return exp_logits / np.sum(exp_logits)

# Iterate through test samples
for i, sample_path in enumerate(npy_files):
    sample = np.load(sample_path)

    # Reshape to (1, 256, 256, 1)
    if sample.ndim == 2:
        sample = np.expand_dims(sample, axis=-1)
    sample = sample.reshape(1, 256, 256, 1)

    # Predict
    pred_word, pred_genre = model.predict(sample)

    # Apply temperature softmax
    word_probs = softmax_with_temperature(pred_word[0], T=1.5)

    # Get top-5 predictions
    top5_word_indices = np.argsort(word_probs)[::-1][:5]

    # Output
    print(f"\n File: {os.path.basename(sample_path)}")

    print("Top-5 Word Predictions:")
    for rank, idx in enumerate(top5_word_indices):
        label = id2label.get(idx, "Unknown")
        prob = word_probs[idx]
        print(f"  Top-{rank+1}: Class ID = {idx}, Label = {label}, Probability = {prob:.4f}")



Model loaded successfully.
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 622ms/step

 File: Music4_001.npy
Top-5 Word Predictions:
  Top-1: Class ID = 48, Label = 爱, Probability = 0.0244
  Top-2: Class ID = 51, Label = 的, Probability = 0.0175
  Top-3: Class ID = 5, Label = 互联, Probability = 0.0161
  Top-4: Class ID = 10, Label = 你, Probability = 0.0160
  Top-5: Class ID = 4, Label = 中, Probability = 0.0159
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step

 File: Music4_002.npy
Top-5 Word Predictions:
  Top-1: Class ID = 31, Label = 我想, Probability = 0.0305
  Top-2: Class ID = 61, Label = 颜色, Probability = 0.0156
  Top-3: Class ID = 62, Label = 首, Probability = 0.0156
  Top-4: Class ID = 59, Label = 遥望, Probability = 0.0156
  Top-5: Class ID = 58, Label = 辽阔, Probability = 0.0156
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step

 File: Music4_003.npy
Top-5 Word Predictions:
  Top-1: Class ID = 30, Label = 我, Probability = 0.0