In [11]:
import argparse
import os
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import wandb
import yaml
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm import tqdm

from data_proc.emotionmocap_dataset import EmotionDataset
from data_proc.utils import increment_path
from model.model import TCN, PURE1D
import torch.nn.functional as F
import argparse
import cv2
import numpy as np
import torch
from torchvision import models
from CAM.eigen_cam import EigenCAM

from CAM.guided_backprop import GuidedBackpropReLUModel
from CAM.utils.image import show_cam_on_image, deprocess_image, preprocess_image
from CAM.utils.model_targets import ClassifierOutputTarget


device = torch.device("cpu")
parser = argparse.ArgumentParser()
project='runs/train'
weight='latest'
exp_name='exp130'
data_path='/home/taehyun/workspace/childtoy/MotionReasoning/dataset/mocap_emotion_rig'
window=80
batch_size=1

processed_data_dir='processed_data_mocam/'

save_dir = Path(os.path.join('runs', 'train', exp_name))
wdir = save_dir / 'weights'
weights = os.listdir(wdir)

if weight == 'latest':
    weights_paths = [wdir / weight for weight in weights]
    weight_path = max(weights_paths , key = os.path.getctime)
else:
    weight_path = wdir / ('train-' + weight + '.pt')
ckpt = torch.load(weight_path, map_location=device)
print(f"Loaded weight: {weight_path}")


# Load LAFAN Dataset
Path(processed_data_dir).mkdir(parents=True, exist_ok=True)
emotion_dataset = EmotionDataset(data_dir=data_path, processed_data_dir=processed_data_dir, train=False, device=device, window=window)
emotion_data_loader = DataLoader(emotion_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
n_hid = 70
n_level = 4
channel_sizes = [n_hid] * n_level
kernel_size = 5
model = PURE1D(input_channels, n_classes, kernel_size=kernel_size, dropout=0)
model.load_state_dict(ckpt['TCN'])
model.eval()
correct = 0
n_classes = ckpt['n_classes']
input_channels = ckpt['input_channels']
seq_length = 40
n_hid = ckpt['n_hid']
n_level = ckpt['n_level']
n_classes = 7
input_channels = 105
origin_data = iter(emotion_data_loader).next()# confusion_matrix = torch.zeros(7, 7)
local_q = origin_data["local_q"].to(device)
q_vel = origin_data["q_vel"].to(device) 
q_acc = origin_data["q_acc"].to(device) 
labels = origin_data["labels"].to(device)
data = torch.cat([local_q, q_vel, q_acc], axis=2)
data = data.permute(0,2,1)
output = model(data)


# 

Loaded weight: runs/train/exp130/weights/train-200.pt
torch.Size([1, 105, 7])


  return F.log_softmax(output)


In [17]:
data.shape

torch.Size([1, 105, 80])

In [14]:
cam = EigenCAM(model=model, target_layers = model.net.net3, use_cuda=False)


In [16]:
with cam :

    # AblationCAM and ScoreCAM have batched implementations.
    # You can override the internal batch size for faster computation.
    cam.batch_size = 1
    grayscale_cam = cam(input_tensor=input_tensor,
                        targets=targets,
                        aug_smooth=args.aug_smooth,
                        eigen_smooth=args.eigen_smooth)

    # Here grayscale_cam has only one image in the batch
    grayscale_cam = grayscale_cam[0, :]

    cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

    # cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
    cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)

AttributeError: 'EigenCAM' object has no attribute 'layer'

In [8]:
from CAM.utils.find_layer import replace_all_layer_type_recursive


In [161]:


def slerp(x, y, a):
    """
    Perfroms spherical linear interpolation (SLERP) between x and y, with proportion a

    :param x: quaternion tensor
    :param y: quaternion tensor
    :param a: indicator (between 0 and 1) of completion of the interpolation.
    :return: tensor of interpolation results
    """
    device = x.device
    len = torch.sum(x * y, dim=-1)

    neg = len < 0.0
    len[neg] = -len[neg]
    y[neg] = -y[neg]

    a = torch.zeros_like(x[..., 0]) + a
    amount0 = torch.zeros(a.shape, device=device)
    amount1 = torch.zeros(a.shape, device=device)

    linear = (1.0 - len) < 0.01
    omegas = torch.arccos(len[~linear])
    sinoms = torch.sin(omegas)

    amount0[linear] = 1.0 - a[linear]
    amount0[~linear] = torch.sin((1.0 - a[~linear]) * omegas) / sinoms

    amount1[linear] = a[linear]
    amount1[~linear] = torch.sin(a[~linear] * omegas) / sinoms
    # res = amount0[..., np.newaxis] * x + amount1[..., np.newaxis] * y

    res = amount0.unsqueeze(2) * x + amount1.unsqueeze(2) * y

    return res


In [162]:

slerp(minibatch_pose_input[:,0:1], minibatch_pose_input[:,1:2], 1)

tensor([[[ 0.3482, -0.6039,  0.3385, -0.5345,  1.0330,  0.3379,  0.7086,
          -1.2253, -0.4736, -0.1716,  0.5938, -0.5642,  0.0935, -0.0975,
           1.0120,  0.2344,  0.1243, -0.3263, -0.1054,  0.3421,  0.1136,
          -0.5162,  0.0650, -0.1728, -0.3562, -0.2133,  0.3854, -0.1702,
          -0.6525,  0.2746,  0.2509,  0.4100, -0.0539, -0.1555, -0.0955]]])

In [62]:
minibatch_pose_input[:,1:2]

tensor([[[ 0.3208, -0.6984,  0.1608, -0.6784,  0.8908,  0.4103,  0.6880,
          -1.2213, -0.5218,  0.1570,  0.4645, -0.5792,  0.1383,  0.0084,
           1.0896,  0.0180,  0.1263, -0.3861, -0.0862,  0.3160,  0.1249,
          -0.5277,  0.0879, -0.1222, -0.4131, -0.1709,  0.3516,  0.0086,
          -0.6398,  0.2707,  0.3412, -0.2118,  0.0266, -0.2267, -0.0706]]])

In [60]:
(minibatch_pose_input[:,0:1] - minibatch_pose_input[:,1:2])/2  + minibatch_pose_input[:,0:1]

tensor([[[ 0.3753, -0.7690,  0.4045, -0.3956,  1.0158,  0.3112,  0.6439,
          -1.2066, -0.4249, -0.2364,  0.5893, -0.5288,  0.0688,  0.0719,
           0.9805,  0.0739,  0.1063, -0.2881, -0.0945,  0.3488,  0.3066,
          -0.5396,  0.1073,  0.4202, -0.9112,  0.0515,  0.3812,  1.7749,
          -0.6355,  0.3403,  1.3667, -6.6347, -0.0482, -0.1574, -0.0926]]])

In [None]:
mask_start_frame = 0 
seq_len = int(local_q.size(1))
expand_rate = int(teacher_len/student_len)
for i in range(student_len):
    interpolated = torcch
for i in range(expand_rate):
        dt = 1 / expand_rate
        interpolated[:,i:i+1,:] = slerp()

In [64]:
batch_size = 1
teacher_len = 60
num_joint = 35
torch.zeros([batch_size, teacher_len, num_joint]).shape

torch.Size([1, 60, 35])

In [126]:
dt_list

array([ 0.        ,  0.33898305,  0.6779661 ,  1.01694915,  1.3559322 ,
        1.69491525,  2.03389831,  2.37288136,  2.71186441,  3.05084746,
        3.38983051,  3.72881356,  4.06779661,  4.40677966,  4.74576271,
        5.08474576,  5.42372881,  5.76271186,  6.10169492,  6.44067797,
        6.77966102,  7.11864407,  7.45762712,  7.79661017,  8.13559322,
        8.47457627,  8.81355932,  9.15254237,  9.49152542,  9.83050847,
       10.16949153, 10.50847458, 10.84745763, 11.18644068, 11.52542373,
       11.86440678, 12.20338983, 12.54237288, 12.88135593, 13.22033898,
       13.55932203, 13.89830508, 14.23728814, 14.57627119, 14.91525424,
       15.25423729, 15.59322034, 15.93220339, 16.27118644, 16.61016949,
       16.94915254, 17.28813559, 17.62711864, 17.96610169, 18.30508475,
       18.6440678 , 18.98305085, 19.3220339 , 19.66101695, 20.        ])

In [159]:
print(np.random.randint(0, teacher_len - student_len,8))

[ 7 36 25 19  4 24 30 32]


In [163]:
mask_start_frame = 0 
batch_size = 1
teacher_len = 60
student_len = 20
num_joint = 35
np.random.randint(0, teacher_len - student_len)
local_start_idx = np.random.randint(0, teacher_len - student_len,8)
global_start_idx = [10, 20]
seq_len = int(local_q.size(1))
expand_rate = int(teacher_len/student_len)
print('expand_rate', expand_rate)
interpolated = torch.zeros([batch_size, teacher_len, num_joint])

data_seq = []
data_seq.append(local_q[:,global_start_idx[0]:global_start_idx[0]  + teacher_len])
data_seq.append(local_q[:,global_start_idx[1]:global_start_idx[1]  + teacher_len])

local_dt_list = np.linspace(0, student_len-1, teacher_len)
for k in range(8):
    start_idx = local_start_idx[k]
    minibatch_pose_input = local_q[:,start_idx:start_idx + student_len]
    j = 0 
    for i in range(teacher_len-1):
        interpolate_start = minibatch_pose_input[:,int(dt_list[i])].unsqueeze(1)
        interpolate_end = minibatch_pose_input[:,int(dt_list[i])+1].unsqueeze(1)
        interpolated[:,j] = slerp(interpolate_start, interpolate_end, dt_list[i]-int(dt_list[i]))
        j+=1
    interpolated[:,-1] = minibatch_pose_input[:,-1]
    data_seq.append(interpolated)


expand_rate 3


In [165]:
vits.__dict__[args.arch](patch_size=args.patch_size)

10

In [142]:
import torch
vits16 = torch.hub.load('facebookresearch/dino:main', 'dino_vits16')

Downloading: "https://github.com/facebookresearch/dino/archive/main.zip" to /home/taehyun/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dino/dino_deitsmall16_pretrain/dino_deitsmall16_pretrain.pth" to /home/taehyun/.cache/torch/hub/checkpoints/dino_deitsmall16_pretrain.pth


  0%|          | 0.00/82.7M [00:00<?, ?B/s]

In [144]:
vits16

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 384, kernel_size=(16, 16), stride=(16, 16))
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (blocks): ModuleList(
    (0): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=384, out_features=1152, bias=True)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=384, out_features=384, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=384, out_features=1536, bias=True)
        (act): GELU()
        (fc2): Linear(in_features=1536, out_features=384, bias=True)
        (drop): Dropout(p=0.0, inplace=False)
      )
    )
    (1): Block(
      (norm1): LayerNorm((384,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (

In [101]:
mask_start_frame = 0 
batch_size = 1
teacher_len = 60
student_len = 20
num_joint = 35
seq_len = int(local_q.size(1))
expand_rate = int(teacher_len/student_len)
print('expand_rate', expand_rate)
interpolated = torch.zeros([batch_size, teacher_len, num_joint])
minibatch_pose_input = local_q[:,0:student_len]
for i in range(student_len-1):
    interpolate_start = minibatch_pose_input[:,i].unsqueeze(1)
    interpolate_end = minibatch_pose_input[:,i+1].unsqueeze(1)
    for j in range(expand_rate):
        if i == 0 :
            dt = 1 / (expand_rate)
            interpolated[:,expand_rate*(i-1):expand_rate*(i-1)+j] = slerp(interpolate_start, interpolate_end, dt*j)
        else : 
            dt = 1 / (expand_rate)
            interpolated[:,expand_rate*(i-1):expand_rate*(i-1)+j] = slerp(interpolate_start, interpolate_end, dt*j)
# print(intte)    
# print('seq_len', seq_len)
# minibatch_pose_input = local_q.reshape(local_q.size(0), seq_len, -1)
# interpolated = torch.zeros_like(minibatch_pose_input)
# for i in range(target_seq_len):
#     interpolate_start = minibatch_pose_input[:,0:1]
#     interpolate_end = minibatch_pose_input[:,seq_len-1:]
#     for i in range(seq_len):
#         dt = 1 / (seq_len-1)
#         interpolated[:,i:i+1,:] = slerp(interpolate_start, interpolate_end, dt * i)

#     assert torch.allclose(interpolated[:,0:1], interpolate_start)
#     assert torch.allclose(interpolated[:,seq_len-1:], interpolate_end)
# else:
#     interpolate_start1 = minibatch_pose_input[:,0:1]
#     interpolate_end1 = minibatch_pose_input[:,mask_start_frame:mask_start_frame+1]

#     interpolate_start2 = minibatch_pose_input[:, mask_start_frame:mask_start_frame+1]
#     interpolate_end2 = minibatch_pose_input[:,seq_len-1:]

#     for i in range(mask_start_frame+1):
#         dt = 1 / mask_start_frame
#         interpolated[:,i:i+1,:] = slerp(interpolate_start1, interpolate_end1, dt * i)

#     assert torch.allclose(interpolated[:,0:1], interpolate_start1)
#     assert torch.allclose(interpolated[:,mask_start_frame:mask_start_frame+1], interpolate_end1)

#     for i in range(mask_start_frame, seq_len):
#         dt = 1 / (seq_len - mask_start_frame - 1)
#         interpolated[:,i:i+1,:] = slerp(interpolate_start2, interpolate_end2, dt * (i - mask_start_frame))

#     assert torch.allclose(interpolated[:,mask_start_frame:mask_start_frame+1], interpolate_start2)
#     assert torch.allclose(interpolated[:,seq_len-1:], interpolate_end2)

# interpolated = torch.nn.functional.normalize(interpolated, p=2.0, dim=3)

expand_rate 3
torch.Size([1, 1])
torch.Size([1, 1])
i 0
0.0
start 0
end 0
torch.Size([1, 1])
torch.Size([1, 1])
i 0
0.3333333333333333
start 0
end 1
torch.Size([1, 1])
torch.Size([1, 1])
i 0
0.6666666666666666
start 0
end 2
torch.Size([1, 1])
torch.Size([1, 1])
i 1
0.0
start 3
end 3
torch.Size([1, 1])
torch.Size([1, 1])
i 1
0.3333333333333333
start 3
end 4
torch.Size([1, 1])
torch.Size([1, 1])
i 1
0.6666666666666666
start 3
end 5
torch.Size([1, 1])
torch.Size([1, 1])
i 2
0.0
start 6
end 6
torch.Size([1, 1])
torch.Size([1, 1])
i 2
0.3333333333333333
start 6
end 7
torch.Size([1, 1])
torch.Size([1, 1])
i 2
0.6666666666666666
start 6
end 8
torch.Size([1, 1])
torch.Size([1, 1])
i 3
0.0
start 9
end 9
torch.Size([1, 1])
torch.Size([1, 1])
i 3
0.3333333333333333
start 9
end 10
torch.Size([1, 1])
torch.Size([1, 1])
i 3
0.6666666666666666
start 9
end 11
torch.Size([1, 1])
torch.Size([1, 1])
i 4
0.0
start 12
end 12
torch.Size([1, 1])
torch.Size([1, 1])
i 4
0.3333333333333333
start 12
end 13
torch

In [97]:
interpolated[0,-5]

tensor([ 0.7014, -0.3513,  0.2116, -0.3492,  0.1766, -0.9525, -0.5637,  0.1699,
         0.4342, -0.2971, -0.8326,  0.5925,  0.2770, -0.1130, -0.3524,  0.9334,
         0.2243, -0.0978, -0.1088, -0.1055, -0.1362,  0.6335, -1.0102,  0.0710,
         0.4224, -0.1058, -0.0804,  0.0507,  0.5528, -0.9270, -0.0478, -0.4164,
        -0.1885, -0.0339,  0.1089])

In [29]:
interpolated.shape

torch.Size([1, 80, 35])

In [18]:
local_q.shape

torch.Size([1, 80, 35])

In [None]:
from torchvision import models
model = models.resnet50(pretrained=True)
target_layers = [model.layer4]


In [None]:


def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--use-cuda', action='store_true', default=False,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument(
        '--image-path',
        type=str,
        default='./examples/both.png',
        help='Input image path')
    parser.add_argument('--aug_smooth', action='store_true',
                        help='Apply test time augmentation to smooth the CAM')
    parser.add_argument(
        '--eigen_smooth',
        action='store_true',
        help='Reduce noise by taking the first principle componenet'
        'of cam_weights*activations')
    parser.add_argument('--method', type=str, default='gradcam',
                        choices=['gradcam', 'gradcam++',
                                 'scorecam', 'xgradcam',
                                 'ablationcam', 'eigencam',
                                 'eigengradcam', 'layercam', 'fullgrad'],
                        help='Can be gradcam/gradcam++/scorecam/xgradcam'
                             '/ablationcam/eigencam/eigengradcam/layercam')

    args = parser.parse_args()
    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    if args.use_cuda:
        print('Using GPU for acceleration')
    else:
        print('Using CPU for computation')

    return args


if __name__ == '__main__':
    """ python cam.py -image-path <path_to_image>
    Example usage of loading an image, and computing:
        1. CAM
        2. Guided Back Propagation
        3. Combining both
    """

    args = get_args()
    methods = 
        {"gradcam": GradCAM,
         "scorecam": ScoreCAM,
         "gradcam++": GradCAMPlusPlus,
         "ablationcam": AblationCAM,
         "xgradcam": XGradCAM,
         "eigencam": EigenCAM,
         "eigengradcam": EigenGradCAM,
         "layercam": LayerCAM,
         "fullgrad": FullGrad}

    model = models.resnet50(pretrained=True)

    # Choose the target layer you want to compute the visualization for.
    # Usually this will be the last convolutional layer in the model.
    # Some common choices can be:
    # Resnet18 and 50: model.layer4
    # VGG, densenet161: model.features[-1]
    # mnasnet1_0: model.layers[-1]
    # You can print the model to help chose the layer
    # You can pass a list with several target layers,
    # in that case the CAMs will be computed per layer and then aggregated.
    # You can also try selecting all layers of a certain type, with e.g:
    # from pytorch_grad_cam.utils.find_layers import find_layer_types_recursive
    # find_layer_types_recursive(model, [torch.nn.ReLU])
    target_layers = [model.layer4]

    rgb_img = cv2.imread(args.image_path, 1)[:, :, ::-1]
    rgb_img = np.float32(rgb_img) / 255
    input_tensor = preprocess_image(rgb_img,
                                    mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])


    # We have to specify the target we want to generate
    # the Class Activation Maps for.
    # If targets is None, the highest scoring category (for every member in the batch) will be used.
    # You can target specific categories by
    # targets = [e.g ClassifierOutputTarget(281)]
    
    targets = None

    # Using the with statement ensures the context is freed, and you can
    # recreate different CAM objects in a loop.

    
    cam = EigenCAM(model=model, )
    cam_algorithm = methods[args.method]
    with cam_algorithm(model=model,
                       target_layers=target_layers,
                       use_cuda=args.use_cuda) as cam:

        # AblationCAM and ScoreCAM have batched implementations.
        # You can override the internal batch size for faster computation.
        cam.batch_size = 32
        grayscale_cam = cam(input_tensor=input_tensor,
                            targets=targets,
                            aug_smooth=args.aug_smooth,
                            eigen_smooth=args.eigen_smooth)

        # Here grayscale_cam has only one image in the batch
        grayscale_cam = grayscale_cam[0, :]

        cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)

        # cam_image is RGB encoded whereas "cv2.imwrite" requires BGR encoding.
        cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)

    gb_model = GuidedBackpropReLUModel(model=model, use_cuda=args.use_cuda)
    gb = gb_model(input_tensor, target_category=None)

    cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])
    cam_gb = deprocess_image(cam_mask * gb)
    gb = deprocess_image(gb)

    cv2.imwrite(f'{args.method}_cam.jpg', cam_image)
    cv2.imwrite(f'{args.method}_gb.jpg', gb)
    cv2.imwrite(f'{args.method}_cam_gb.jpg', cam_gb)