In [2]:
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense

### 비디오 데이터 전처리 및 npy로 저장

In [21]:
import cv2
import numpy as np

def preprocess_video_every_3_seconds(video_path, frame_size, frame_rate=3):
    """
    Extracts frames every 3 seconds from a video file, resizing them to frame_size and converting to grayscale.
    
    Args:
    video_path (str): Path to the video file.
    frame_size (tuple): Size (height, width) to resize frames.
    frame_rate (int): Number of frames to extract per second within the 3-second window.

    Returns:
    List[numpy.ndarray]: List of sequences, where each sequence is a numpy array of shape (num_frames, height, width, 1).
    """

    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    interval = int(fps * 3)

    sequences = []
    while True:
        frames = []
        for _ in range(interval):
            success, frame = vidcap.read()
            if not success:
                break
            frame = cv2.resize(frame, frame_size, interpolation=cv2.INTER_AREA)
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            gray_frame = np.expand_dims(gray_frame, axis=-1)  # Add channel dimension
            gray_frame = gray_frame.astype(np.float32) / 255.0  # Convert to float and normalize
            frames.append(gray_frame)

        if len(frames) == 0:
            break
        sequences.append(np.array(frames[:frame_rate * 3]))

    vidcap.release()
    return np.array(sequences)

In [22]:
# Example usage:
video_sequences = preprocess_video_every_3_seconds("/Users/idaeho/Documents/GitHub/project_shorts/New_sample/원천데이터/2~5분/SUMVID_SHORT_TRAIN_01/test.mp4", (256, 256))

In [23]:
np.array(video_sequences[0]).shape

(9, 256, 256, 1)

In [24]:
np.save('./test.npy', video_sequences)

In [25]:
test = np.load('./test.npy')

In [26]:
test

array([[[[[0.9529412 ],
          [0.9647059 ],
          [0.9647059 ],
          ...,
          [0.95686275],
          [0.9607843 ],
          [0.9490196 ]],

         [[0.75686276],
          [0.7137255 ],
          [0.70980394],
          ...,
          [0.91764706],
          [0.80784315],
          [0.6901961 ]],

         [[0.6156863 ],
          [0.5647059 ],
          [0.56078434],
          ...,
          [0.6       ],
          [0.5019608 ],
          [0.46666667]],

         ...,

         [[0.21568628],
          [0.21568628],
          [0.21960784],
          ...,
          [0.8235294 ],
          [0.84313726],
          [0.54509807]],

         [[0.16078432],
          [0.17254902],
          [0.17254902],
          ...,
          [0.39607844],
          [0.3647059 ],
          [0.31764707]],

         [[0.3764706 ],
          [0.42352942],
          [0.42745098],
          ...,
          [0.38431373],
          [0.3882353 ],
          [0.37254903]]],


        [[[0.9529

### 모델 구축

In [28]:
# Model Definition
class CNNLSTM(Model):
    def __init__(self, filters=32, units=50, activation='relu', input_shape=(None, 256, 256, 1), **kwargs):
        super().__init__(**kwargs)
        self.conv1 = TimeDistributed(Conv2D(filters, (3, 3), activation=activation), input_shape=input_shape)
        self.maxpool = TimeDistributed(MaxPooling2D((2, 2)))
        self.flatten = TimeDistributed(Flatten())
        self.lstm = LSTM(units)
        self.output_layer = Dense(1, activation='sigmoid')
        
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.lstm(x)
        x = self.output_layer(x)
        return x

In [29]:
# Example usage
model = CNNLSTM()
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  super().__init__(**kwargs)


In [30]:
model.summary()

In [31]:
labels = np.random.randint(0, 2, size=(test.shape[0], 1))

In [32]:
# Train the model
model.fit(test, labels, epochs=10, batch_size=5)

# Print the model summary
model.summary()

Epoch 1/10


2024-04-25 21:52:26.385779: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 6s/step - accuracy: 0.4327 - loss: 1.4731
Epoch 2/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 5s/step - accuracy: 0.6003 - loss: 1.2597
Epoch 3/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 4s/step - accuracy: 0.5675 - loss: 1.2722
Epoch 4/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 6s/step - accuracy: 0.5870 - loss: 1.0582
Epoch 5/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 6s/step - accuracy: 0.6152 - loss: 0.9935
Epoch 6/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 6s/step - accuracy: 0.5701 - loss: 1.0379
Epoch 7/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m72s[0m 5s/step - accuracy: 0.6791 - loss: 0.7524
Epoch 8/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 6s/step - accuracy: 0.6089 - loss: 0.8424
Epoch 9/10
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1