In [1]:
# from collections import defaultdict
import cv2
import numpy as np
import os
from copy import deepcopy

from datetime import datetime

import torch
# import torch.nn as nn
# import torch.nn.functional as F

import albumentations as A

import matplotlib.pyplot as plt

# from datetime import datetime

import models
from timm.models import create_model

In [2]:
root = "/data/ephemeral/home/datasets/abnormal/val/"


npy_root = "./npy/"

if not os.path.exists(npy_root):
    os.makedirs(npy_root)


In [3]:
folder_list = os.listdir(root)
folder_list.sort()
print(f"==>> folder_list: {folder_list}")

==>> folder_list: ['VS_03.이상행동_07.전도', 'VS_03.이상행동_08.파손', 'VS_03.이상행동_09.방화', 'VS_03.이상행동_10.흡연', 'VS_03.이상행동_11.유기', 'VS_03.이상행동_12.절도', 'VS_03.이상행동_13.폭행', 'VS_03.이상행동_14.교통약자']


In [4]:
segments_num = 1
# 모델에 들어갈 frame수는 16 * segments_num

model = create_model(
    "vit_small_patch16_224",
    # "vit_base_patch16_224",
    img_size=224,
    pretrained=False,
    num_classes=710,
    all_frames=16 * segments_num,
    # tubelet_size=args.tubelet_size,
    # drop_rate=args.drop,
    # drop_path_rate=args.drop_path,
    # attn_drop_rate=args.attn_drop_rate,
    # head_drop_rate=args.head_drop_rate,
    # drop_block_rate=None,
    # use_mean_pooling=args.use_mean_pooling,
    # init_scale=args.init_scale,
    # with_cp=args.with_checkpoint,
)

load_dict = torch.load(
    "/data/ephemeral/home/level2-3-cv-finalproject-cv-06/datapreprocess/vit_s_k710_dl_from_giant.pth"
)
# load_dict = torch.load(
#     "/data/ephemeral/home/level2-3-cv-finalproject-cv-06/datapreprocess/vit_b_k710_dl_from_giant.pth"
# )
# backbone pth 경로

model.load_state_dict(load_dict["module"])

model.to("cuda")
model.eval()

tf = A.Resize(224, 224)

In [5]:
for folder_name in folder_list:
    
    time_start = datetime.now()

    print(f"{folder_name} feature extracting starts")

    if not os.path.exists(npy_root+folder_name):
        os.makedirs(npy_root+folder_name)

    folder_path = root + folder_name + "/"

    file_list = os.listdir(root + folder_name)
    file_list.sort()
    print(f"==>> file_list: {file_list}")

    batch_size = 16
    # Loop through the video frames
    for file_name in file_list:
        path = folder_path + file_name

        cap = cv2.VideoCapture(path)

        # 710차원 feature array 저장할 list
        np_list = []

        # 16 * segments_num 프레임씩 저장할 list
        frames = []
        frame_count = 0

        # input tensor 저장할 list
        input_list = []
        input_count = 0

        while cap.isOpened():
            # Read a frame from the video
            success, frame = cap.read()
            # frame.shape = (height, width, 3)

            frame_count += 1  # Increment frame count

            if success:
                frame = tf(image=frame)["image"]
                # frame.shape = (224, 224, 3)

                frame = np.expand_dims(frame, axis=0)
                # frame.shape = (1, 224, 224, 3)
                frames.append(frame.copy())

                if frame_count == 16 * segments_num:
                    assert len(frames) == 16 * segments_num
                    frames = np.concatenate(frames)
                    # in_frames.shape = (16 * segments_num, 224, 224, 3)
                    in_frames = frames.transpose(3, 0, 1, 2)
                    # # in_frames.shape = (RGB 3, frame T=16 * segments_num, H=224, W=224)
                    in_frames = np.expand_dims(in_frames, axis=0)
                    # in_frames.shape = (1, 3, 16 * segments_num, 224, 224)
                    in_frames = torch.from_numpy(in_frames).float()
                    # in_frames.shape == torch.Size([1, 3, 16 * segments_num, 224, 224])

                    input_list.append(in_frames.detach().clone())

                    frame_count = 0
                    frames = []

                    input_count += 1

                    if input_count == batch_size:
                        # input_batch.shape == torch.Size([batch_size, 3, 16 * segments_num, 224, 224])
                        input_batch = torch.cat(input_list, dim=0).to("cuda")

                        with torch.no_grad():
                            output = model(input_batch)
                            # output.shape == torch.Size([batch_size, 710])

                        np_list.append(output.cpu().numpy())

                        input_count = 0
                        input_list = []
            else:
                # 남은 프레임, input_list가 지정 개수에서 모자를 때 예외 처리
                if frame_count != 0 and len(frames) != 0:
                    # @@ success가 false 일때도 frame_count는 +1이 된다
                    # @@ => frames = []로 초기화 된 바로 다음 frame에 success가 false가 되면
                    # @@ => frame_count == 1 이지만 len(frames) == 0
                    len_frames_left = 16 * segments_num - len(frames)
                    # len_input_list_left = batch_size - len(input_list)

                    # assert len(frames) != 0

                    for i in range(len_frames_left):
                        try:
                            frames.append(frames[-1].copy())
                        except IndexError:
                            print(f"==>> len(frames): {len(frames)}")
                            print(f"==>> len_frames_left: {len_frames_left}")

                    assert len(frames) == 16 * segments_num

                    frames = np.concatenate(frames)
                    # in_frames.shape = (16 * segments_num, 224, 224, 3)
                    in_frames = frames.transpose(3, 0, 1, 2)
                    # # in_frames.shape = (RGB 3, frame T=16 * segments_num, H=224, W=224)
                    in_frames = np.expand_dims(in_frames, axis=0)
                    # in_frames.shape = (1, 3, 16 * segments_num, 224, 224)
                    in_frames = torch.from_numpy(in_frames).float()
                    # in_frames.shape == torch.Size([1, 3, 16 * segments_num, 224, 224])

                    input_list.append(in_frames.detach().clone())

                    # assert len(input_list) == batch_size

                    # input_batch.shape == torch.Size([batch_size, 3, 16 * segments_num, 224, 224])
                    input_batch = torch.cat(input_list, dim=0).to("cuda")

                    with torch.no_grad():
                        output = model(input_batch)
                        # output.shape == torch.Size([len(input_list), 710])

                    np_list.append(output.cpu().numpy())

                    frame_count = 0
                    frames = []
                    input_count = 0
                    input_list = []

                # Break the loop if the end of the video is reached
                break
        try:
            file_outputs = np.concatenate(np_list)
            # print(f"==>> file_outputs.shape: {file_outputs.shape}")
            np.save((npy_root + folder_name + "/" + file_name), file_outputs)
        except ValueError:
            print(f"{file_name} ValueError: need at least one array to concatenate")

        cap.release()

    time_end = datetime.now()
    total_time = time_end - time_start
    total_time = str(total_time).split(".")[0]

    print(f"{folder_name} feature extracting ended. Elapsed time: {total_time}")

    # cv2.destroyAllWindows()

VS_03.이상행동_07.전도 feature extracting starts
==>> file_list: ['C_3_7_48_BU_DYB_10-17_10-23-53_CB_RGB_DF2_M2.mp4', 'C_3_7_48_BU_DYB_10-17_10-23-53_CC_RGB_DF2_M2.mp4', 'C_3_7_48_BU_DYB_10-17_10-23-53_CD_RGB_DF2_M2.mp4', 'C_3_7_48_BU_DYB_10-17_10-23-53_CE_RGB_DF2_M2.mp4', 'C_3_7_48_BU_SMC_10-14_09-52-07_CB_RGB_DF2_M2.mp4', 'C_3_7_48_BU_SMC_10-14_09-52-07_CD_RGB_DF2_M2.mp4', 'C_3_7_48_BU_SMC_10-14_09-52-07_CE_RGB_DF2_M2.mp4', 'C_3_7_49_BU_DYB_10-17_10-25-31_CB_RGB_DF2_M2.mp4', 'C_3_7_49_BU_DYB_10-17_10-25-31_CC_RGB_DF2_M2.mp4', 'C_3_7_49_BU_DYB_10-17_10-25-31_CD_RGB_DF2_M2.mp4', 'C_3_7_49_BU_DYB_10-17_10-25-31_CE_RGB_DF2_M2.mp4', 'C_3_7_49_BU_SMC_10-14_09-59-53_CA_RGB_DF2_F2.mp4', 'C_3_7_49_BU_SMC_10-14_09-59-53_CC_RGB_DF2_F2.mp4', 'C_3_7_49_BU_SMC_10-14_09-59-53_CD_RGB_DF2_F2.mp4', 'C_3_7_50_BU_DYB_10-17_10-28-37_CA_RGB_DF2_F2.mp4', 'C_3_7_50_BU_DYB_10-17_10-28-37_CB_RGB_DF2_F2.mp4', 'C_3_7_50_BU_DYB_10-17_10-28-37_CC_RGB_DF2_F2.mp4', 'C_3_7_50_BU_DYB_10-17_10-28-37_CD_RGB_DF2_F2.mp4', 'C_3

In [6]:
segments_num = 1
# 모델에 들어갈 frame수는 16 * segments_num

model = create_model(
    # "vit_small_patch16_224",
    "vit_base_patch16_224",
    img_size=224,
    pretrained=False,
    num_classes=710,
    all_frames=16 * segments_num,
    # tubelet_size=args.tubelet_size,
    # drop_rate=args.drop,
    # drop_path_rate=args.drop_path,
    # attn_drop_rate=args.attn_drop_rate,
    # head_drop_rate=args.head_drop_rate,
    # drop_block_rate=None,
    # use_mean_pooling=args.use_mean_pooling,
    # init_scale=args.init_scale,
    # with_cp=args.with_checkpoint,
)

# load_dict = torch.load(
#     "/data/ephemeral/home/level2-3-cv-finalproject-cv-06/datapreprocess/vit_s_k710_dl_from_giant.pth"
# )
load_dict = torch.load(
    "/data/ephemeral/home/level2-3-cv-finalproject-cv-06/datapreprocess/vit_b_k710_dl_from_giant.pth"
)
# backbone pth 경로

model.load_state_dict(load_dict["module"])

model.to("cuda")
model.eval()

tf = A.Resize(224, 224)

In [7]:
for folder_name in folder_list:
    time_start = datetime.now()

    print(f"{folder_name} feature extracting starts")

    if not os.path.exists(npy_root + folder_name + "_base"):
        os.makedirs(npy_root + folder_name + "_base")

    folder_path = root + folder_name + "/"

    file_list = os.listdir(root + folder_name)
    file_list.sort()
    print(f"==>> file_list: {file_list}")

    batch_size = 16
    # Loop through the video frames
    for file_name in file_list:
        path = folder_path + file_name

        cap = cv2.VideoCapture(path)

        # 710차원 feature array 저장할 list
        np_list = []

        # 16 * segments_num 프레임씩 저장할 list
        frames = []
        frame_count = 0

        # input tensor 저장할 list
        input_list = []
        input_count = 0

        while cap.isOpened():
            # Read a frame from the video
            success, frame = cap.read()
            # frame.shape = (height, width, 3)

            frame_count += 1  # Increment frame count

            if success:
                frame = tf(image=frame)["image"]
                # frame.shape = (224, 224, 3)

                frame = np.expand_dims(frame, axis=0)
                # frame.shape = (1, 224, 224, 3)
                frames.append(frame.copy())

                if frame_count == 16 * segments_num:
                    assert len(frames) == 16 * segments_num
                    frames = np.concatenate(frames)
                    # in_frames.shape = (16 * segments_num, 224, 224, 3)
                    in_frames = frames.transpose(3, 0, 1, 2)
                    # # in_frames.shape = (RGB 3, frame T=16 * segments_num, H=224, W=224)
                    in_frames = np.expand_dims(in_frames, axis=0)
                    # in_frames.shape = (1, 3, 16 * segments_num, 224, 224)
                    in_frames = torch.from_numpy(in_frames).float()
                    # in_frames.shape == torch.Size([1, 3, 16 * segments_num, 224, 224])

                    input_list.append(in_frames.detach().clone())

                    frame_count = 0
                    frames = []

                    input_count += 1

                    if input_count == batch_size:
                        # input_batch.shape == torch.Size([batch_size, 3, 16 * segments_num, 224, 224])
                        input_batch = torch.cat(input_list, dim=0).to("cuda")

                        with torch.no_grad():
                            output = model(input_batch)
                            # output.shape == torch.Size([batch_size, 710])

                        np_list.append(output.cpu().numpy())

                        input_count = 0
                        input_list = []
            else:
                # 남은 프레임, input_list가 지정 개수에서 모자를 때 예외 처리
                if frame_count != 0 and len(frames) != 0:
                    # @@ success가 false 일때도 frame_count는 +1이 된다
                    # @@ => frames = []로 초기화 된 바로 다음 frame에 success가 false가 되면
                    # @@ => frame_count == 1 이지만 len(frames) == 0
                    len_frames_left = 16 * segments_num - len(frames)
                    # len_input_list_left = batch_size - len(input_list)
                    for i in range(len_frames_left):
                        frames.append(frames[-1].copy())

                    assert len(frames) == 16 * segments_num

                    frames = np.concatenate(frames)
                    # in_frames.shape = (16 * segments_num, 224, 224, 3)
                    in_frames = frames.transpose(3, 0, 1, 2)
                    # # in_frames.shape = (RGB 3, frame T=16 * segments_num, H=224, W=224)
                    in_frames = np.expand_dims(in_frames, axis=0)
                    # in_frames.shape = (1, 3, 16 * segments_num, 224, 224)
                    in_frames = torch.from_numpy(in_frames).float()
                    # in_frames.shape == torch.Size([1, 3, 16 * segments_num, 224, 224])

                    input_list.append(in_frames.detach().clone())

                    # assert len(input_list) == batch_size

                    # input_batch.shape == torch.Size([batch_size, 3, 16 * segments_num, 224, 224])
                    input_batch = torch.cat(input_list, dim=0).to("cuda")

                    with torch.no_grad():
                        output = model(input_batch)
                        # output.shape == torch.Size([len(input_list), 710])

                    np_list.append(output.cpu().numpy())

                    frame_count = 0
                    frames = []
                    input_count = 0
                    input_list = []

                # Break the loop if the end of the video is reached
                break

        try:
            file_outputs = np.concatenate(np_list)
            # print(f"==>> file_outputs.shape: {file_outputs.shape}")
            np.save((npy_root + folder_name + "_base/" + file_name), file_outputs)
        except ValueError:
            print(f"{file_name} ValueError: need at least one array to concatenate")

        cap.release()

    time_end = datetime.now()
    total_time = time_end - time_start
    total_time = str(total_time).split(".")[0]

    print(f"{folder_name} feature extracting ended. Elapsed time: {total_time}")

    # cv2.destroyAllWindows()

Abuse feature extracting starts
==>> file_list: ['Abuse001_x264.mp4', 'Abuse002_x264.mp4', 'Abuse003_x264.mp4', 'Abuse004_x264.mp4', 'Abuse005_x264.mp4', 'Abuse006_x264.mp4', 'Abuse007_x264.mp4', 'Abuse008_x264.mp4', 'Abuse009_x264.mp4', 'Abuse010_x264.mp4', 'Abuse011_x264.mp4', 'Abuse012_x264.mp4', 'Abuse013_x264.mp4', 'Abuse014_x264.mp4', 'Abuse015_x264.mp4', 'Abuse016_x264.mp4', 'Abuse017_x264.mp4', 'Abuse018_x264.mp4', 'Abuse019_x264.mp4', 'Abuse020_x264.mp4', 'Abuse021_x264.mp4', 'Abuse022_x264.mp4', 'Abuse023_x264.mp4', 'Abuse024_x264.mp4', 'Abuse025_x264.mp4', 'Abuse026_x264.mp4', 'Abuse027_x264.mp4', 'Abuse028_x264.mp4', 'Abuse029_x264.mp4', 'Abuse030_x264.mp4', 'Abuse031_x264.mp4', 'Abuse032_x264.mp4', 'Abuse033_x264.mp4', 'Abuse034_x264.mp4', 'Abuse035_x264.mp4', 'Abuse036_x264.mp4', 'Abuse037_x264.mp4', 'Abuse038_x264.mp4', 'Abuse039_x264.mp4', 'Abuse040_x264.mp4', 'Abuse041_x264.mp4', 'Abuse042_x264.mp4', 'Abuse043_x264.mp4', 'Abuse044_x264.mp4', 'Abuse045_x264.mp4', 'Abuse