# 實驗一

- 使用 `Taylor Videos` 的 video representation 對兒童肢體診斷影片進行去識別化處理，以避免隱私洩漏。
- 使用 `Taylor Videos` 的 action recognition model 進行肢體診斷影片的動作識別。

In [1]:
import cv2
import os
import torch
import math
import numpy as np
import torchvision
import glob
import time
from tqdm import tqdm

In [2]:
def taylorvideo(video_path, terms, window_size, step_size):
    
    if window_size - 3 < terms:
        print("The given temporal block length is not enough to compute K terms.")
        
    else:
        vidcap = cv2.VideoCapture(video_path)
        
        fps = vidcap.get(cv2.CAP_PROP_FPS)
        vlen = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
        print("Video length: %d" % vlen, " |  FPS: %d" % fps)
        
        success, image = vidcap.read()
        count = 1
        while success:
            
            if count < window_size:
                success,image = vidcap.read()
                count += 1

In [3]:
def preprocess_tensor(tensor):
    # Replace negative values with 0
    tensor[tensor < 0] = 0
    # Scale the values to fit in the range [0, 255]
    max_val = tensor.max()
    if max_val > 0:
        tensor = (tensor * 255 / max_val).to(torch.uint8)
    else:
        tensor = tensor.to(torch.uint8)
    return tensor

In [4]:
def videoConvert(vid_path,o,terms,tPrime):
    ts = time.time()
    if (tPrime <= terms+3):
        tPrime = terms + 3
    cap = cv2.VideoCapture(vid_path)
    fpsT = cap.get(cv2.CAP_PROP_FPS)
    vidlength = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    print("length = ", vidlength)
    print("fps = ", fpsT)
    print(f"Check point 1: {time.time()-ts}")
    ret,frame = cap.read()
    norm_g = torch.from_numpy(cv2.cvtColor(frame,  cv2.COLOR_BGR2GRAY))
    norm_g = torch.div(norm_g, 255.0)
    h, w = norm_g.shape
    print(f"Check point 2: {time.time()-ts}")
    
    length = terms + 3
    full_difference_list = torch.zeros((length,length,h,w), dtype=torch.float64)
    full_difference_list[0,0,:,:] = norm_g

    for initialInc in range(1,terms+3):
        ret, frame = cap.read()
        norm_g = torch.from_numpy(cv2.cvtColor(frame,  cv2.COLOR_BGR2GRAY))
        norm_g = torch.div(norm_g, 255.0)
        full_difference_list[0,initialInc,:,:] = norm_g
    print(f"Check point 3: {time.time()-ts}")

    img = torch.zeros(((vidlength-tPrime+1),h,w,3), dtype=torch.uint8)
    
    cp31 = []
    cp32 = []
    cp33 = []
    pbar = tqdm(range(0,vidlength-tPrime+1))
    for sequences in pbar:
        local_ts = time.time()
        if sequences == 0:
            for listInc in range(1,terms+3):
                full_difference_list[listInc,:,:,:] = torch.nn.functional.pad((full_difference_list[listInc-1,1:,:,:]-full_difference_list[listInc-1,:-1,:,:]),(0,0,0,0,0,1))
        if sequences != 0:
            ret, frame = cap.read()

            full_difference_list = torch.roll(full_difference_list, -1, 1)
            
            norm_g = torch.from_numpy(cv2.cvtColor(frame,  cv2.COLOR_BGR2GRAY))
            norm_g = torch.div(norm_g, 255.0)
            full_difference_list[0,length-1,:,:] = norm_g
            for listInc in range(1,terms+3):
                full_difference_list[listInc,length-1-listInc,:,:] = full_difference_list[listInc-1,length-listInc,:,:] - full_difference_list[listInc-1,length-1-listInc,:,:]
        
        cp31.append(time.time()-local_ts)

        t1Sum = 0
        t2Sum = 0
        t3Sum = 0

        dummy = full_difference_list[0,0,:,:].unsqueeze(0).repeat(length, 1, 1)
        xa_Tensor =  full_difference_list[0,:,:,:] - dummy

        for incB in range(0,terms):
            part = torch.div(torch.pow(xa_Tensor, incB), math.factorial(int(incB)))
            t1Sum += torch.mul(torch.sum(part,0),full_difference_list[incB+1,0,:,:])
            t2Sum += torch.mul(torch.sum(part,0),full_difference_list[incB+2,0,:,:])
            t3Sum += torch.mul(torch.sum(part,0),full_difference_list[incB+3,0,:,:])

        cp32.append(time.time()-local_ts)

        # R channel
        t1Sum = preprocess_tensor(t1Sum / tPrime)
        img[sequences,:,:,0] = t1Sum 
        # G channel
        t2Sum = preprocess_tensor(t2Sum / tPrime)
        img[sequences,:,:,1] = t2Sum 
        # B channel
        t3Sum = preprocess_tensor(t3Sum / tPrime)
        img[sequences,:,:,2] = t3Sum

        cp33.append(time.time()-local_ts)

        pbar.set_postfix({'cp31': np.mean(cp31), 'cp32': np.mean(cp32), 'cp33': np.mean(cp33)})
        
    print(f"Check point 4: {time.time()-ts}")
    cap.release()
    print(type(img))
    print(img.shape)
    print(img.dtype)
    fpsT = int(fpsT)
    print(type(fpsT))
    torchvision.io.write_video(filename=o, video_array=img, fps=fpsT, video_codec='mpeg4') # notic: you should modeify the code in this function, line 140: frame.pict_type = "NONE" to frame.pict_type = 0

In [None]:
videoConvert("new_test.mp4","test_taylor.mp4",3,0)
# videoConvert("./brush.mp4","./brush-taylor.mp4",3,0)

length =  116
fps =  24.0
Check point 1: 0.12832355499267578
Check point 2: 0.17293572425842285
Check point 3: 0.21001148223876953


100%|██████████| 111/111 [00:12<00:00,  9.03it/s, cp31=0.0332, cp32=0.0975, cp33=0.109]


Check point 4: 12.532719850540161
<class 'torch.Tensor'>
torch.Size([111, 720, 1280, 3])
torch.uint8
<class 'int'>
