In [None]:
from scipy.io import loadmat
import numpy as np
import os
from tensorflow import keras
import tensorflow as tf
import math
import matplotlib.pyplot as plt 

from matplotlib.colors import ListedColormap

In [None]:
filePath = '../lab/LSTM_training.mat'
data = loadmat(filePath)

In [None]:
data['DLC_sequences'][2219][0].shape

In [None]:
data['DLC_sequences'][:2219].shape # left leg

In [None]:
data['DLC_sequences'][2219:].shape # right leg

In [63]:
left_leg_90 = [
    data['DLC_sequences'][:2219][i][0]
    for i in range(0,2219)
    if data['DLC_sequences'][:2219][i][0].shape[1]==90
]
left_leg_75 = [
    data['DLC_sequences'][:2219][i][0]
    for i in range(0,2219)
    if data['DLC_sequences'][:2219][i][0].shape[1]==75
]

In [64]:
left_leg_90 = np.array(left_leg_90)
left_leg_75 = np.array(left_leg_75)

In [68]:
left_leg_75[1].shape

(6, 75)

In [131]:
# sequence class to load and vectorize batches of data
class Keypoints(keras.utils.Sequence):
    def __init__(self, keypoint_array):
        self.keypoints = keypoint_array


    def __len__(self):
        return len(self.keypoints)

    def encode(self, batch):
        frame_batch = self.keypoints.shape[2]
        fps = frame_batch / 3.0
        features = 12
        joints = self.keypoints.shape[1] // 2

        # motion encoder
        A = np.zeros((frame_batch, joints, features))

        for j in range(0,joints): # for each joint
            for i in range(1,frame_batch):
                # displacement
                A[i][j][0] = batch[2*j][i]- batch[2*j][i-1]
                A[i][j][1] = batch[2*j + 1][i]- batch[2*j+1][i-1]
                A[i][j][2] = math.sqrt(A[i][j][0]**2 + A[i][j][1]**2)

                # speed
                A[i][j][3] = A[i][j][0]*fps
                A[i][j][4] = A[i][j][1]*fps
                A[i][j][5] = A[i][j][2]*fps

                # acceleration
                A[i][j][6] = (A[i][j][3]-A[i-1][j][3])*fps
                A[i][j][7] = (A[i][j][4]-A[i-1][j][4])*fps
                A[i][j][8] = (A[i][j][5]-A[i-1][j][5])*fps


                # distance from joint j to spine_base(hip_cd)
                A[i][j][9] = batch[2*j][i]- batch[0][i]
                A[i][j][10] = batch[2*j + 1][i]- batch[1][i]
                A[i][j][11] = math.sqrt(A[i][j][9]**2 + A[i][j][10]**2)

        return A

    def __getitem__(self, index):
        # encoded = []
        # for kp in self.keypoints[index*frame_batch : (index+1)*frame_batch]:
        #     encoded.append(self.encode(kp))
        # encoded = np.array(encoded)
        # print(f"Generated batch data shape: {encoded.shape}")
        return self.encode(self.keypoints[index])

In [132]:
# Instantiate data Sequences for each split
data_generator = Keypoints(left_leg_75)

In [133]:
data_generator[0].shape

(75, 3, 12)

In [75]:
class DotLayer(tf.keras.layers.Layer):

    def __init__(self, units=8):
        super(DotLayer, self).__init__()
        self.units = units

    def build(self, input_shape):
        # initialize the prototype
        self.w = self.add_weight(shape=(input_shape[-1], self.units),
                               initializer='random_normal',
                               trainable=True,
                               name='dot_layer')
        print("initialize the prototype")

    def call(self,inputs):
        return tf.matmul(inputs, self.w)

    def get_config(self):
        return {"units": self.units}

In [None]:
class MyModel(tf.keras.Model):
    # D: the dimension of the embedding features 
    # K: the number of cluster 
    # KL_weight: balance weight of the similarity term and the temporal order-preserving term
    # SK_inter: the number of Sinkhorn-Knopp iteration
    # alpha: weight of temporal coherence loss
    def __init__(self,frame_batch,features,joints, D=30, K=8, KL_weight=0.1, SK_inter=5, alpha=1.0, T=10.0):
        super(MyModel, self).__init__()
        self.D = D
        self.K = K
        self.T = T
        self.klw = KL_weight
        self.nit = SK_inter
        
        self.frame_batch = frame_batch
        
        self.features = features
        self.joints = joints

        self.alpha = alpha

        # 2-layer MLP   
        self.dense1 = tf.keras.layers.Dense(units=2*D, activation='sigmoid')
        self.dense2 = tf.keras.layers.Dense(units=D, activation='sigmoid')
        self.dot1 = DotLayer()
        self.softmax1 = tf.keras.layers.Softmax()


    def call(self, inputs):

        features = self.features 
        joints = self.joints
        D = self.D
        T = self.T
        frame_batch = self.frame_batch
        x = inputs
        x = tf.reshape(x, shape=(-1, joints*features))

        x = self.dense1(x)
        x = self.dense2(x)

        self.Z = x

        x = self.dot1(x)
        outputs = self.softmax1(x/T)
        return outputs
    
    def compile(self):
        super(MyModel, self).compile()
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, decay=1e-4)
        
    def get_prior(self, x):
        frame_batch = self.frame_batch
        var = tf.math.reduce_variance(x)
        std = tf.math.reduce_std(x)

        T = []
        K = self.K
        for i in range(frame_batch):
            T_i = []
            for j in range(K):
                d_ij = abs(i/frame_batch-j/K)/tf.math.sqrt(1/(frame_batch**2)+1/(K**2))
                # Gaussian distribution 
                T_i.append(tf.math.exp(-d_ij**2/(2*var))/std*math.sqrt(2*math.pi))
            T.append(T_i)
        T = tf.stack(T)
        return T
        
    def temporal_ot(self, x):
        Z = self.Z
        # C: learnable prototypes of the K clusters
        C = self.trainable_variables[-1]
        tf.print(tf.math.reduce_max(C))
        T = self.get_prior(x)

        
        frame_batch = self.frame_batch
        k_cluster = self.K
        # balance weight of the similarity term and the temporal order-preserving term
        klw = self.klw
        
        # Sinkhorn-Knopp Algorithm
        v = np.ones((k_cluster,1))
        u = np.ones((frame_batch,1))

        a = u / frame_batch
        b = v / k_cluster 
        K = tf.math.exp((tf.matmul(Z,C) + klw*tf.math.log(T))/klw) 
        # nit the number of Sinkhorn-Knopp iteration
        for i in range(1,self.nit):
            #v = b / np.dot(K.T, u), u = a / np.dot(K, v)
            v = b / tf.matmul(tf.transpose(K), u)
            u = a / tf.matmul(K, v)

        v = tf.squeeze(v)
        u = tf.squeeze(u)
        
        Q_TOT=tf.matmul(tf.linalg.diag(u),tf.matmul(K, tf.linalg.diag(v)))
        tf.print(tf.math.reduce_max(Q_TOT))
        return tf.stack(Q_TOT)
        
    def loss_func(self, P, Q):

        frame_batch = self.frame_batch
        # Cross-Entropy Loss
        l_ce = -tf.math.reduce_sum(tf.multiply(Q,tf.math.log(P)))/frame_batch
        tf.print(tf.math.reduce_sum(tf.multiply(Q,tf.math.log(P))))
        # Temporal Coherence Loss - N pair loss
        Z = self.Z
        
        N = 8
        l = int(frame_batch/N)

        Z_i = []
        Z_positive = []
        window_size = 4
        # sample z_i
        for i in range(N):
            index = i*l + np.random.randint(0,l-1,1)
            z_i = tf.gather(Z, index, axis=0)
            Z_i.append(z_i)
            
            # calculate window range
            min_index = max(0, index - window_size)
            max_index = min(frame_batch - 1, index + window_size)
            # sample z_postive inside the window
            idx_pos = np.random.randint(min_index, max_index,1)
            z_ip = tf.squeeze(tf.gather(Z, idx_pos, axis=0))
            Z_positive.append(z_ip)

        Z_i = tf.stack(Z_i)
        Z_positive = tf.stack(Z_positive)


        l_tc = 0
        for i in range(N):  
            denominator = tf.reduce_sum(tf.exp(tf.matmul(Z_i[i],tf.transpose(Z_positive))),axis=1, keepdims=True)
            l_tc += tf.squeeze(tf.math.log(tf.exp(tf.matmul(Z_i[i],tf.reshape(Z_positive[i],shape=(self.D,1)))/denominator)))
        l_tc = - l_tc/N
        tf.print("LCE:", l_ce,"LTC:",l_tc)
        return l_ce + self.alpha*l_tc
        
        
    def train_step(self, data):
        x = data
        
        with tf.GradientTape() as tape:
            P = self(x, training=True)
            Q = self.temporal_ot(x)
            loss = self.loss_func(P,Q)
        # compute gradients
        gradients = tape.gradient(loss, self.trainable_variables)
        # update weights using the optimizer
        self.optimizer.apply_gradients(zip(gradients, self.trainable_variables))
        
        return {'loss': loss}
    def get_config(self):

        return {"frame_batch": self.frame_batch,"D": self.D, "K": self.K,"KL_weight": self.klw,"SK_inter": self.nit,"alpha": self.alpha,"T": self.T}

    @classmethod
    def from_config(cls, config):
        return cls(**config)

In [None]:
model = MyModel(data_generator[0].shape[0],3,12)
model.compile()
callbacks = [keras.callbacks.ModelCheckpoint("motion_tracking.h5", save_best_only=True)]
model.build(input_shape=(None, 3, 12))
model.summary()

In [None]:
data_generator[0].shape

In [None]:
history = model.fit(data_generator, epochs=5, callbacks=callbacks)

In [None]:
loss = history.history['loss']

epochs = range(1,6)

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()

In [None]:
def draw_segment(P):
    # Generate a random array with numbers from 0 to 7
    # data_array = np.random.randint(0, 8, size=75)
    data_array = P
    print(data_array.shape)
    # Create a color map for the segments
    colors = ['red', 'green', 'blue', 'orange', 'yellow', 'purple', 'brown', 'pink']
    cmap = ListedColormap(colors)

    segment_indices = np.arange(75)

    # Calculate the lengths of each segment
    segment_lengths = np.ones(75)

    # Plot the segmented bar chart
    fig, ax = plt.subplots(figsize=(12, 2))

    plt.bar(segment_indices, segment_lengths, color=cmap(data_array[segment_indices]), width=1)

    # Customize the chart
    plt.xlabel('Frame')
    plt.ylabel('Segment')
    plt.title('Segmented Bar Chart')

    plt.subplots_adjust(left=0, right=1, bottom=0, top=1)
    # Display the chart
    plt.show()


In [None]:
P = model.predict(data_generator[0])
P = np.argmax(P, axis=1)
P.mean()

In [None]:
draw_segment(P)

In [None]:
x = range(0,243)
y = [
    np.argmax(model.predict(data), axis=1).mean()
    for data in data_generator
]
y = np.array(y)

plt.scatter(x, y)

In [5]:
import json
import cv2
import os
import logging

In [24]:
gma_json=os.path.join("../action/gma/baseline.json")
gma_video =os.path.join( "../action/gma/baseline.mp4")
gma_segment = "../action/gma"

In [11]:
try:
    with open(gma_json, "r") as f:
                data = json.load(f)
except Exception as e:
    logging.error("Failed to load annotation file:{}".format(e))

In [17]:
video_id = list(data.keys())[0]

In [15]:
cap = cv2.VideoCapture(gma_video)

In [20]:
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print(width, height, fps)

492 360 25.0


In [25]:
for idx, annotation in enumerate(data.get(video_id, [])):
    start_frame = annotation["segment"][0]
    end_frame = annotation["segment"][1]
    label = annotation.get("label", None)
    duration_frame = annotation.get("duration_frame", None)

    cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

    output_file = str(idx) + ".mp4" if label is None else label + "_" + str(idx) + ".mp4"
    output_path = gma_segment + '/' + output_file
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    output_video = cv2.VideoWriter(output_path, fourcc, fps, (width, height))


    for frame_idx in range(start_frame, end_frame + 1):
        ret, frame = cap.read()
        if not ret:
            break

        output_video.write(frame)

    output_video.release()
cap.release()



In [26]:
a = [1,3,4,5]
b = 4
b in a

True