In [1]:
import numpy as np
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt

from momaapi import MOMA
from sentence_transformers import SentenceTransformer
from tqdm import tqdm

In [2]:
pattern = {}

In [3]:
def DTW(p, q, sim):
    D = np.zeros((len(p) + 1, len(q) + 1))

    for i in range(1, len(p)+1):
        for j in range(1, len(q)+1):
            s = sim[p[i-1], q[j-1]]
            D[i,j] = max(D[i-1,j], D[i,j-1], D[i-1,j-1] + s)

    backtrack(p, q, sim, D)

    return D

def getstr(path):
    xs = [str(p[0]) for p in path]
    xs = " ".join(xs)
    ys = [str(p[1]) for p in path]
    ys = " ".join(ys)
    return xs + "\n" + ys

def backtrack(p, q, sim, D):
    path = []
    i, j = len(p), len(q)

    while i >= 1 and j >= 1:
        s = sim[p[i-1], q[j-1]]
        if D[i,j] == D[i-1,j]:
            i = i -1
            if path:
                s = getstr(path[::-1])
                if s not in pattern:
                    pattern[s] = 1
                else:
                    pattern[s] += 1
                path = []
        elif D[i,j] == D[i,j-1]:
            j = j - 1
            if path:
                s = getstr(path[::-1])
                if s not in pattern:
                    pattern[s] = 1
                else:
                    pattern[s] += 1
                path = []
        elif D[i,j] == D[i-1,j-1] + s:
            i, j = i - 1, j - 1
            path.append((p[i-1], q[j-1]))
        else:
            raise ValueError

In [4]:
# PREPROCESSING

moma = MOMA(dir_moma="/data/dir_moma/", paradigm="standard")

vid2seq = {}   # activity id -> sub-activity sequence
sid2cname = {} # sub-activity id -> sub-activity class name
for split in ["train", "val", "test"]:
    ids_act = moma.get_ids_act(split=split)
    for act in tqdm(moma.get_anns_act(ids_act=ids_act), desc=f"PREPROCESSING ({split})"):
        sact_seq = []
        for sact in moma.get_anns_sact(ids_sact=act.ids_sact):
            sid2cname[sact.cid] = sact.cname
            sact_seq.append(sact.cid)
        vid2seq[act.id] = np.array(sact_seq)

sid2cemb = {} # sub-activity id -> sub-activity caption embedding
sbert = SentenceTransformer("all-MiniLM-L6-v2")
for vid, cname in sid2cname.items():
    cemb = sbert.encode(cname)
    cemb = torch.from_numpy(cemb).float()
    sid2cemb[vid] = cemb
    
cembs = torch.zeros(len(sid2cemb), 384)
for idx, emb in sid2cemb.items():
    cembs[idx] = emb

cembs = F.normalize(cembs, dim=-1)
sim = torch.mm(cembs, cembs.t())
sim = sim.numpy()

PREPROCESSING (train): 100%|██████████| 904/904 [00:16<00:00, 54.73it/s] 
PREPROCESSING (val): 100%|██████████| 226/226 [00:03<00:00, 57.61it/s] 
PREPROCESSING (test): 100%|██████████| 282/282 [00:04<00:00, 60.21it/s] 


In [5]:
# VERSION: DO NOT ALLOW GAP

# ids_act_train = moma.get_ids_act(split="train")
# ids_act_val = moma.get_ids_act(split="val")
# ids_act_test = moma.get_ids_act(split="test")
# ids_act = ids_act_train + ids_act_val + ids_act_test
ids_act = moma.get_ids_act(cnames_act=["basketball game"])

for vid_i in tqdm(ids_act):
    for vid_j in ids_act:
        if vid_i == vid_j:
            continue
        p = vid2seq[vid_i]
        q = vid2seq[vid_j]

        D = DTW(p, q, sim)

100%|██████████| 143/143 [00:10<00:00, 14.04it/s]


In [14]:
key, count = [], []
for k, v in pattern.items():
    if len(k) > 9:
        key.append(k)
        count.append(v)

count = np.array(count)
prob = count / count.sum()

sorted_idx = np.argsort(prob)[::-1]

for idx in sorted_idx[:10]:
    print("============================")
    print(key[idx])
    print(f"prob: {prob[idx]}")
    print("============================")

62 62
22 62
prob: 0.02224039247751431
22 62
62 62
prob: 0.02224039247751431
24 62
62 62
prob: 0.020768601798855275
62 62
24 62
prob: 0.020768601798855275
22 62
22 62
prob: 0.01256473153447806
22 22
24 22
prob: 0.012292177705096757
24 22
22 22
prob: 0.012264922322158627
62 62 62
22 62 62
prob: 0.011174707004633416
22 62 62
62 62 62
prob: 0.011174707004633416
62 22
62 22
prob: 0.010902153175252113
