In [1]:
import numpy as np

A_PATH = "../list/nalist.npy"          # 레포 nalist
B_PATH = "../list/nalist_i3d.npy"      # 네 nalist

a = np.load(A_PATH, allow_pickle=True)
b = np.load(B_PATH, allow_pickle=True)

print("shape:", a.shape, b.shape)
print("exactly equal:", np.array_equal(a, b))


shape: (1609, 2) (1609, 2)
exactly equal: False


In [2]:

la = (a[:,1] - a[:,0]).astype(int)
lb = (b[:,1] - b[:,0]).astype(int)

def is_partition(x):
    return (x[0,0] == 0) and np.all(x[1:,0] == x[:-1,1])

print("A partition:", is_partition(a), "end:", int(a[-1,1]))
print("B partition:", is_partition(b), "end:", int(b[-1,1]))
print("same N:", len(la) == len(lb))
print("same total:", int(a[-1,1]) == int(b[-1,1]))
print("same order lens:", np.array_equal(la, lb))
print("same multiset lens:", np.array_equal(np.sort(la), np.sort(lb)))
print("lens changed positions:", int(np.sum(la != lb)), "/", len(la))


A partition: True end: 779951
B partition: True end: 779951
same N: True
same total: True
same order lens: False
same multiset lens: True
lens changed positions: 1603 / 1609


In [4]:
import os
import numpy as np
from collections import Counter

OLD_LIST = "../list/ucf-i3d.list"                   # 레포 list (로컬 경로라도 OK)
NEW_LIST = "../list/ucf-i3d_train_fixed_local.list" # 네가 만든 train list

OLD_NALIST = "../list/nalist.npy"
NEW_NALIST = "../list/nalist_i3d.npy"

def read_list(p):
    return [l.strip() for l in open(p) if l.strip()]

def key(p):
    # 파일명에서 확장자 제거 (필요하면 여기서 규칙 더 다듬으면 됨)
    base = os.path.basename(p)
    return os.path.splitext(base)[0]

old_paths = read_list(OLD_LIST)
new_paths = read_list(NEW_LIST)

old_keys = [key(p) for p in old_paths]
new_keys = [key(p) for p in new_paths]

print("same N list:", len(old_keys), len(new_keys))

# 키 집합 차이 확인
c_old, c_new = Counter(old_keys), Counter(new_keys)
missing = [k for k in new_keys if c_old[k] == 0]
extra   = [k for k in old_keys if c_new[k] == 0]
print("missing (new not in old) sample:", missing[:10])
print("extra (old not in new) sample:", extra[:10])

# permutation: perm[j] = old index corresponding to new index j
pos = {}
for i,k in enumerate(old_keys):
    pos.setdefault(k, []).append(i)

used = Counter()
perm = np.empty(len(new_keys), dtype=int)
for j,k in enumerate(new_keys):
    perm[j] = pos[k][used[k]]
    used[k] += 1

a = np.load(OLD_NALIST, allow_pickle=True)
b = np.load(NEW_NALIST, allow_pickle=True)

# old nalist를 new 순서로 재배치해서 new nalist의 lens와 같은지 확인
la = (a[:,1] - a[:,0]).astype(int)
lb = (b[:,1] - b[:,0]).astype(int)

print("lens match after perm:", np.array_equal(la[perm], lb))


same N list: 1609 1609
missing (new not in old) sample: []
extra (old not in new) sample: []
lens match after perm: False


In [5]:
import os
from collections import Counter

def read_list(p):
    return [l.strip() for l in open(p) if l.strip()]

def key_base(p):
    b = os.path.basename(p)
    return os.path.splitext(b)[0]

old_keys = [key_base(p) for p in read_list("../list/ucf-i3d.list")]
new_keys = [key_base(p) for p in read_list("../list/ucf-i3d_train_fixed_local.list")]

dup_old = [k for k,v in Counter(old_keys).items() if v>1]
dup_new = [k for k,v in Counter(new_keys).items() if v>1]

print("dup count old:", len(dup_old))
print("dup count new:", len(dup_new))
print("sample dup:", dup_old[:10])


dup count old: 0
dup count new: 0
sample dup: []


In [6]:
import numpy as np
import os
from collections import Counter

def read_list(p):
    return [l.strip() for l in open(p) if l.strip()]

def key(p):
    b = os.path.basename(p)
    return os.path.splitext(b)[0]

old_paths = read_list("../list/ucf-i3d.list")
new_paths = read_list("../list/ucf-i3d_train_fixed_local.list")

old_keys = [key(p) for p in old_paths]
new_keys = [key(p) for p in new_paths]

pos = {}
for i,k in enumerate(old_keys):
    pos.setdefault(k, []).append(i)

used = Counter()
perm = np.empty(len(new_keys), dtype=int)
for j,k in enumerate(new_keys):
    perm[j] = pos[k][used[k]]
    used[k] += 1

a = np.load("../list/nalist.npy", allow_pickle=True)
b = np.load("../list/nalist_i3d.npy", allow_pickle=True)
la = (a[:,1]-a[:,0]).astype(int)
lb = (b[:,1]-b[:,0]).astype(int)

bad = np.where(la[perm] != lb)[0]
print("mismatch count:", len(bad), "/", len(lb))
for j in bad[:20]:
    i = perm[j]
    print(j, new_keys[j], "lb(new)=", lb[j], "la(old@mapped)=", la[i], "old_idx=", i)


mismatch count: 1603 / 1609
0 Abuse001_x264_i3d lb(new)= 171 la(old@mapped)= 34 old_idx= 0
1 Abuse002_x264_i3d lb(new)= 55 la(old@mapped)= 104 old_idx= 1
2 Abuse003_x264_i3d lb(new)= 232 la(old@mapped)= 58 old_idx= 2
3 Abuse004_x264_i3d lb(new)= 1050 la(old@mapped)= 25 old_idx= 3
4 Abuse005_x264_i3d lb(new)= 60 la(old@mapped)= 62 old_idx= 4
5 Abuse006_x264_i3d lb(new)= 274 la(old@mapped)= 161 old_idx= 5
6 Abuse007_x264_i3d lb(new)= 72 la(old@mapped)= 47 old_idx= 6
7 Abuse008_x264_i3d lb(new)= 526 la(old@mapped)= 57 old_idx= 7
8 Abuse009_x264_i3d lb(new)= 63 la(old@mapped)= 147 old_idx= 8
9 Abuse010_x264_i3d lb(new)= 71 la(old@mapped)= 76 old_idx= 9
10 Abuse011_x264_i3d lb(new)= 228 la(old@mapped)= 227 old_idx= 10
11 Abuse012_x264_i3d lb(new)= 321 la(old@mapped)= 54 old_idx= 11
12 Abuse013_x264_i3d lb(new)= 305 la(old@mapped)= 31 old_idx= 12
13 Abuse014_x264_i3d lb(new)= 162 la(old@mapped)= 123 old_idx= 13
14 Abuse015_x264_i3d lb(new)= 493 la(old@mapped)= 25 old_idx= 14
15 Abuse016_x264

## RTFM에서 받은 피처랑 MGCN에서 받은 피처가 같은 전처리로 I3D 피처를 추출했는지 점검

In [23]:
import numpy as np

def quick_stats_any(path, max_t=200):
    x = np.load(path, mmap_mode="r", allow_pickle=True)
    info = {"shape": x.shape, "dtype": str(x.dtype)}

    # 자주 나오는 케이스 처리
    if x.ndim == 2:              # (T, D)
        T, D = x.shape
        xs = np.array(x[:min(T, max_t)])
        info.update({
            "format": "(T,D)",
            "T": T, "D": D,
            "mean": float(xs.mean()),
            "std": float(xs.std()),
            "l2_mean": float(np.mean(np.linalg.norm(xs, axis=1))),
        })
        return info

    if x.ndim == 3:              # 보통 (Crops, T, D) or (T, ?, ?)
        # 가정 1: (10, T, D)
        if x.shape[0] in (5, 10) and x.shape[2] > 1:
            C, T, D = x.shape
            xs = np.array(x[:, :min(T, max_t), :])  # (C, t, D)
            # crop 평균낸 버전 통계도 같이
            x_avg = np.mean(xs, axis=0)             # (t, D)
            info.update({
                "format": "(C,T,D)",
                "C": C, "T": T, "D": D,
                "mean": float(xs.mean()),
                "std": float(xs.std()),
                "l2_mean_avgcrop": float(np.mean(np.linalg.norm(x_avg, axis=1))),
            })
            return info

        # 가정 2: (T, D, something) 같은 기타
        info["format"] = "ndim=3 (unknown layout)"
        return info

    info["format"] = f"ndim={x.ndim} (unknown)"
    return info

print(quick_stats_any("../../Downloads/Vandalism048_x264_i3d.npy"))
print(quick_stats_any("../../Downloads/UCF_train_feature/UCF_Train_ten_crop_i3d/Vandalism048_x264_i3d.npy"))


{'shape': (449, 10, 2048), 'dtype': 'float32', 'format': 'ndim=3 (unknown layout)'}
{'shape': (449, 10, 2048), 'dtype': 'float32', 'format': 'ndim=3 (unknown layout)'}


In [26]:
import numpy as np

a = np.load("../../Downloads/Vandalism049_x264_i3d.npy", mmap_mode="r")
b = np.load("../../Downloads/UCF_train_feature/UCF_Train_ten_crop_i3d/Vandalism049_x264_i3d.npy", mmap_mode="r")

print("shape same?", a.shape == b.shape, "dtype same?", a.dtype == b.dtype)
print("allclose?", np.allclose(a, b, atol=1e-6))
print("max abs diff:", float(np.max(np.abs(a - b))))
print("mean abs diff:", float(np.mean(np.abs(a - b))))


shape same? True dtype same? True
allclose? True
max abs diff: 0.0
mean abs diff: 0.0


In [11]:
import os, numpy as np

LIST = "../list/ucf-i3d_test_fixed_local.list"

paths = [l.strip() for l in open(LIST) if l.strip()]
print("num in list:", len(paths))

missing = [p for p in paths if not os.path.exists(p)]
print("missing files:", len(missing))
print("missing sample:", missing[:5])

Ds = set()
bad_shape = 0
for p in paths[:50]:  # 50개만 빠르게
    x = np.load(p, mmap_mode="r")
    if x.ndim != 2:
        bad_shape += 1
        print("bad ndim:", p, x.shape)
        continue
    Ds.add(x.shape[1])
print("unique D (first 50):", sorted(Ds), "bad_shape:", bad_shape)


num in list: 290
missing files: 0
missing sample: []
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Abuse028_x264_i3d.npy (89, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Abuse030_x264_i3d.npy (97, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Arrest001_x264_i3d.npy (149, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Arrest007_x264_i3d.npy (197, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Arrest024_x264_i3d.npy (227, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Arrest030_x264_i3d.npy (541, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Arrest039_x264_i3d.npy (990, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\UCF_Test_ten_i3d\Arson007_x264_i3d.npy (391, 10, 2048)
bad ndim: C:\Users\jplabuser\Downloads\UCF_Test_ten_i3d\

In [18]:
import numpy as np

GT  = "../list/gt-ucf-RTFM.npy"
NAL = "../list/nalist_test_i3d.npy"

gt = np.load(GT, allow_pickle=True)
nal = np.load(NAL, allow_pickle=True)

# 값 범위/유니크(너무 크면 샘플만)
uniq = np.unique(gt[:min(len(gt), 200000)])
print("unique (sample):", uniq[:20], "...", "count:", len(uniq))

print("min/max:", float(np.min(gt)), float(np.max(gt)))
print("mean positive rate overall:", float(np.mean(gt)))

# 비디오별 양성 비율
ratios = []
for i in range(nal.shape[0]):
    s,e = nal[i]
    seg = gt[s:e]
    ratios.append(float(np.mean(seg)) if (e-s)>0 else 0.0)

print("per-video positive rate min/mean/max:",
      float(np.min(ratios)), float(np.mean(ratios)), float(np.max(ratios)))

# 극단 케이스 개수
all_zero = sum(r == 0.0 for r in ratios)
all_one  = sum(r == 1.0 for r in ratios)
print("videos all-zero:", all_zero, "all-one:", all_one, "total:", len(ratios))


unique (sample): [0. 1.] ... count: 2
min/max: 0.0 1.0
mean positive rate overall: 0.07574335094924893
per-video positive rate min/mean/max: 0.0 0.3120738101517141 1.0
videos all-zero: 187 all-one: 78 total: 290


In [19]:
import numpy as np

GT  = "../list/gt-ucf-RTFM.npy"
NAL = "../list/nalist_test_i3d.npy"

gt = np.load(GT, allow_pickle=True)
nal = np.load(NAL, allow_pickle=True)

def transitions(x):
    if len(x) <= 1: return 0
    return int(np.sum(x[1:] != x[:-1]))

trs = []
for i in range(nal.shape[0]):
    s,e = nal[i]
    seg = gt[s:e]
    trs.append(transitions(seg))

print("GT transitions per video min/mean/max:",
      int(np.min(trs)), float(np.mean(trs)), int(np.max(trs)))

# 전환이 너무 많은 상위 10개(이상한 케이스 후보)
top = np.argsort(trs)[-10:][::-1]
print("top 10 transition videos:", [(int(i), int(trs[i])) for i in top])


GT transitions per video min/mean/max: 0 0.1206896551724138 5
top 10 transition videos: [(281, 5), (240, 2), (270, 2), (269, 2), (24, 2), (230, 2), (16, 2), (251, 1), (214, 1), (239, 1)]


In [20]:
import numpy as np

GT  = "../list/gt-ucf-RTFM.npy"
NAL = "../list/nalist_test_i3d.npy"

gt = np.load(GT, allow_pickle=True).astype(np.float32)
nal = np.load(NAL, allow_pickle=True)

T_clip_total = int(nal[-1,1])
k = gt.shape[0] // T_clip_total
print("scale k:", k, " (should be 16)")
assert gt.shape[0] == T_clip_total * k

# frame->clip: max pooling (권장)
gt_clip = gt.reshape(T_clip_total, k).max(axis=1)

print("gt_clip len:", len(gt_clip), "expected:", T_clip_total)
print("gt_clip unique:", np.unique(gt_clip))


scale k: 16  (should be 16)
gt_clip len: 69634 expected: 69634
gt_clip unique: [0. 1.]


In [21]:
ratios = []
trs = []

def transitions(x):
    if len(x) <= 1: return 0
    return int(np.sum(x[1:] != x[:-1]))

for i in range(nal.shape[0]):
    s,e = nal[i]
    seg = gt_clip[s:e]
    ratios.append(float(seg.mean()) if (e-s)>0 else 0.0)
    trs.append(transitions(seg))

print("per-video clip-GT positive rate min/mean/max:",
      float(np.min(ratios)), float(np.mean(ratios)), float(np.max(ratios)))
print("GT transitions per video min/mean/max:",
      int(np.min(trs)), float(np.mean(trs)), int(np.max(trs)))


per-video clip-GT positive rate min/mean/max: 0.0 0.10336522131130613 0.8537549376487732
GT transitions per video min/mean/max: 0 1.0482758620689656 4


In [28]:
import numpy as np
concat = np.load("../../C2FPL/concat_UCF.npy", mmap_mode="r")
pseudo = np.load("../Unsup_label/UCF_unsup_labels_i3d_varT.npy", allow_pickle=True)

print("concat:", concat.shape)
print("pseudo:", getattr(pseudo, "shape", type(pseudo)))


ValueError: Cannot load file containing pickled data when allow_pickle=False