In [4]:
import os
import json
from tqdm import tqdm

from process import compress_video

import tensorflow as tf
from keras.models import Model
from keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense

### 비디오 데이터 전처리 및 npy로 저장

In [5]:
import cv2
import numpy as np

def preprocess_video_every_3_seconds(video_path:str, frame_size:tuple, block_nums:int, frame_rate=3):
    """
    Extracts frames every 3 seconds from a video file, resizing them to frame_size and converting to grayscale.
    
    Args:
    video_path (str): Path to the video file.
    frame_size (tuple): Size (height, width) to resize frames.
    block_nums (int) : Total count for three-seconds-blocks
    frame_rate (int): Number of frames to extract per second within the 3-second window.

    Returns:
    List[numpy.ndarray]: List of sequences, where each sequence is a numpy array of shape (num_frames, height, width, 1).
    """

    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    interval = int(fps * 3)

    sequences = []
    while True:
        frames = []
        for _ in range(interval):
            success, frame = vidcap.read()
            if not success:
                break
            frame = cv2.resize(frame, frame_size, interpolation=cv2.INTER_AREA)
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gray_frame = np.expand_dims(gray_frame, axis=-1)  # 채널 수 늘려줌
            gray_frame = gray_frame.astype(np.float32) / 255.0 
            frames.append(gray_frame)

        if len(frames) == 0:
            break
        ##### 지금 초당 9프레임으로 잡혀있음 (frame_rate*3을 frame_rate으로 바꿈)
        if len(frames) >= frame_rate : 
            sequences.append(np.array(frames[:frame_rate * 3]))  # 모든 frame이 3초단위로 들어갈 수 있도록 제어
        
        if len(sequences) == block_nums:
            break

    vidcap.release()
    return np.array(sequences[:-1])


In [6]:
# video_length = ['2~5분', '5~20분']
video_length = ['2~5분']
output_video_dir = 'processed/video/'

In [9]:
for i, leng in enumerate(video_length):

    output_json_path = f'processed/label/processed_video_data_{i}.json'
    json_path = f'data/라벨링데이터/video_summary_validation_data({leng}).json'
    video_path = f'data/원천데이터/{leng}/'

    with open(json_path, 'r', encoding='utf-8') as f:
        label_data = json.load(f)

    new_video_data = []
    video_idx = 1

    for item in tqdm(label_data):
        input_video_name = item['filename'] + '.npy'
        input_video_path = os.path.join(video_path, input_video_name)
        output_video_name = f"processed_video_{video_idx}.npy"
        output_video_path = os.path.join(output_video_dir, output_video_name)

        if not os.path.exists(input_video_path):
            print(f"Not Found : {input_video_path}")
            continue

        ########
        # 영상 전처리 진행 및 저장
        # 처리할 것 : three_secs json 에서 불러와서 마지막 숫자 번호 보고 그 길이로 축소시키기
        blocks_num = item["three_secs"][-1]

        output = preprocess_video_every_3_seconds(input_video_path, (256, 256), blocks_num)
        np.save(output_video_path, output)

        category = item["category"]

        item['filename'] = output_video_name
        item['category'] = category.encode('utf-8').decode()
        item['path'] = output_video_path
        item['quality'] = '256 256' # 추 후에 데이터 사용할 때, split으로 사용할 수 있게 띄워쓰기로 구분

        video_idx += 1
        new_video_data.append(item)

    # 전처리된 데이터에 대해 라벨을 새로 저장해줌
    with open(output_json_path, 'w', encoding='utf-8') as f:
        json.dump(new_video_data, f, ensure_ascii=False, indent=2)

    print(f"Process Finish :: {leng}")

FileNotFoundError: [Errno 2] No such file or directory: '/Users/idaeho/Documents/GitHub/project_shorts/data/라벨링데이터/2~5분/SUMVID_SHORT_TRAIN/video_summary_training_data(2~5분).json'

### BASELINE 모델

In [28]:
# Model Definition
class CNNLSTM(Model):
    def __init__(self, filters=32, units=50, activation='relu', input_shape=(None, 256, 256, 1), **kwargs):
        super().__init__(**kwargs)
        self.conv1 = TimeDistributed(Conv2D(filters, (3, 3), activation=activation), input_shape=input_shape)
        self.maxpool = TimeDistributed(MaxPooling2D((2, 2)))
        self.flatten = TimeDistributed(Flatten())
        self.lstm = LSTM(units)
        self.output_layer = Dense(1, activation='sigmoid')
        
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.lstm(x)
        x = self.output_layer(x)
        return x

In [29]:
# Example usage
model = CNNLSTM()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(**kwargs)


In [30]:
model.summary()

In [31]:
labels = np.random.randint(0, 2, size=(test.shape[0], 1))

In [32]:
# Train the model
model.fit(test, labels, epochs=10, batch_size=5)

# Print the model summary
model.summary()

Epoch 1/10


2024-04-25 21:52:26.385779: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 6s/step - accuracy: 0.4327 - loss: 1.4731
Epoch 2/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 5s/step - accuracy: 0.6003 - loss: 1.2597
Epoch 3/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 4s/step - accuracy: 0.5675 - loss: 1.2722
Epoch 4/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 6s/step - accuracy: 0.5870 - loss: 1.0582
Epoch 5/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 6s/step - accuracy: 0.6152 - loss: 0.9935
Epoch 6/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 6s/step - accuracy: 0.5701 - loss: 1.0379
Epoch 7/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 5s/step - accuracy: 0.6791 - loss: 0.7524
Epoch 8/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 6s/step - accuracy: 0.6089 - loss: 0.8424
Epoch 9/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

## Data Load

In [70]:
import numpy as np

sample = np.load("./processed/video/processed_video_1.npy")

In [11]:
sample_path = 'data/원천데이터/2~5분/SUMVID_SHORT_TRAIN_01/test.mp4'

sample = preprocess_video_every_3_seconds(sample_path, (256, 256), 100)

In [12]:
len(sample), len(sample[0])

(65, 9)

In [55]:
sample[0].shape

(9, 256, 256, 1)