In [1]:
from tqdm import tqdm
import numpy as np
import torch
import os


# Load Embed

In [2]:
root_dir = "/kaggle/input/private-test-vlsp/private_test/home4/vuhl/VSASV-Dataset/vlsp2025"
list_path = "/kaggle/input/private-test-vlsp/private_test_vlsp.txt"

In [3]:
trials = []
with open(list_path, "r") as f:
    for line in f:
        enroll, test = line.strip().split()
        trials.append((enroll, test))

In [4]:
from tqdm import tqdm
import numpy as np
import os


uids = set()
for p1, p2 in tqdm(trials, desc="Load Embedding"):
    uids.add(os.path.splitext(os.path.basename(p1))[0])
    uids.add(os.path.splitext(os.path.basename(p2))[0])

Load Embedding: 100%|██████████| 557516/557516 [00:01<00:00, 380812.23it/s]


In [5]:
A = {}
miss = 0
base = "/kaggle/input/private-test-embed/r48_embedding_private"

for uid in tqdm(sorted(uids), desc="Preload embeddings"):
    f = os.path.join(base, f"{uid}.npy")
    if not os.path.exists(f):
        miss += 1
        continue
    emb = np.load(f, mmap_mode=None)  # đọc hẳn vào RAM cho nhanh
    emb = np.squeeze(emb).astype(np.float32, copy=False)
    A[uid] = emb

if miss:
    print(f"⚠️ Missing {miss} embeddings")

Preload embeddings: 100%|██████████| 103417/103417 [12:54<00:00, 133.57it/s]


In [6]:
E = {}
miss = 0
base = "/kaggle/input/private-test-embed/aasist_embedding_private"
for uid in tqdm(sorted(uids), desc="Preload embeddings"):
    f = os.path.join(base, f"{uid}.npy")
    if not os.path.exists(f):
        miss += 1
        continue
    emb = np.load(f, mmap_mode=None)  # đọc hẳn vào RAM cho nhanh
    emb = np.squeeze(emb).astype(np.float32, copy=False)
    E[uid] = emb

if miss:
    print(f"⚠️ Missing {miss} embeddings")

Preload embeddings: 100%|██████████| 103417/103417 [11:40<00:00, 147.55it/s]


In [7]:
aasist = []
for path1, path2 in tqdm(trials, desc="Compute cosine"):
    uid1 = os.path.splitext(os.path.basename(path1))[0]
    uid2 = os.path.splitext(os.path.basename(path2))[0]
    if uid1 not in E or uid2 not in E:
        # báo thiếu nhưng tiếp tục
        # print(f"❌ Missing embedding for {path1} or {path2}")
        continue
    score = float(np.dot(E[uid1], E[uid2]))  # L2-norm ⇒ cosine = dot
    aasist.append((path1, path2, score))

Compute cosine: 100%|██████████| 557516/557516 [00:02<00:00, 208140.06it/s]


In [8]:
with open("aasist.txt", "w") as f:
    for enroll, test, score in aasist:
        f.write(f"{enroll}\t{test}\t{score:.5f}\n")

In [9]:
r48 = []
for path1, path2 in tqdm(trials, desc="Compute cosine"):
    uid1 = os.path.splitext(os.path.basename(path1))[0]
    uid2 = os.path.splitext(os.path.basename(path2))[0]
    if uid1 not in E or uid2 not in E:
        # báo thiếu nhưng tiếp tục
        # print(f"❌ Missing embedding for {path1} or {path2}")
        continue
    score = float(np.dot(A[uid1], A[uid2]))  # L2-norm ⇒ cosine = dot
    r48.append((path1, path2, score))

Compute cosine: 100%|██████████| 557516/557516 [00:02<00:00, 188047.87it/s]


In [10]:
with open("r48.txt", "w") as f:
    for enroll, test, score in r48:
        f.write(f"{enroll}\t{test}\t{score:.5f}\n")

In [12]:
import json

with open("/kaggle/input/private-test-embed/aasist_score_private.json", "r") as f:
    cm_score = json.load(f)


## Compute Final Score

In [16]:
final = []
for i in range(len(r48)):
    score_asv = float(r48[i][2])      # cosine từ ASV
    score_cm = float(aasist[i][2])        # cosine từ CM
    score_prob = float(cm_score[r48[i][1]])  # spoof prob

    # Fusion
    score = 0.2 * score_asv + 0.5 * score_cm + 0.3 * (1 - score_prob)

    # Lưu theo đúng thứ tự (enroll, test, fused_score)
    final.append((r48[i][0], r48[i][1], score))

In [17]:
with open("prediction.txt", "w") as f:
    for enroll, test, score in final:
        f.write(f"{enroll}\t{test}\t{score:.5f}\n")

In [18]:
!zip submission.zip prediction.txt

  adding: prediction.txt (deflated 84%)
