In [13]:
import os
import json
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

from process import compress_video

import tensorflow as tf
from keras.models import Model
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense

%matplotlib inline

## Label Load 및 추출 함수

In [14]:
import numpy as np

def quadrant_diff(arr, highlight_map):
    """
    Calculate differences within each quadrant of the frame and update highlight_map based on these differences, 
    using the standard deviation of all differences as the threshold.

    Args:
    arr (List of np.array): Each element is a 9x256x256x1 numpy array representing a frame.

    Returns:
    List: Updated highlight_map indicating highlights based on quadrant differences.
    """
    all_diffs = []
    
    for i in range(len(arr) - 1):
        for quadrant in range(4):
            # 4개로 분리된 frame의 차이를 계산하는 부분
            quarter_shape = (arr[i].shape[1] // 2, arr[i].shape[2] // 2)
            x_start = (quadrant % 2) * quarter_shape[0]
            y_start = (quadrant // 2) * quarter_shape[1]
            current_quarter = arr[i][:, x_start:x_start + quarter_shape[0], y_start:y_start + quarter_shape[1], :]
            next_quarter = arr[i + 1][:, x_start:x_start + quarter_shape[0], y_start:y_start + quarter_shape[1], :]

            # 각 frame의 차이를 계산
            diff = np.abs(current_quarter - next_quarter).sum()
            all_diffs.append(diff)
    
    # 계산된 frame 차이의 표준편차를 임계값으로 설정
    threshold = np.std(all_diffs)
    
    # 해당 임계값을 바탕으로 frame 라벨 update
    for i in range(len(arr) - 1):
        count_above_threshold = 0
        for quadrant in range(4):
            quarter_shape = (arr[i].shape[1] // 2, arr[i].shape[2] // 2)
            x_start = (quadrant % 2) * quarter_shape[0]
            y_start = (quadrant // 2) * quarter_shape[1]
            current_quarter = arr[i][:, x_start:x_start + quarter_shape[0], y_start:y_start + quarter_shape[1], :]
            next_quarter = arr[i + 1][:, x_start:x_start + quarter_shape[0], y_start:y_start + quarter_shape[1], :]
            
            diff = np.abs(current_quarter - next_quarter).sum()

            if diff > threshold:
                count_above_threshold += 1
        
        if count_above_threshold == 4:
            highlight_map[i] += 1
        elif count_above_threshold >= 1:
            highlight_map[i] = 2
        else:
            highlight_map[i] = 0

    return highlight_map


In [15]:
# Load JSON data
with open('test.json', 'r') as file:
    json_data = json.load(file)

def parse_annotations(annotation:list, block_num:int):
    """
    Extracts Every Annotation from json label file
    
    Args:
    annotations(List): List of Dictionary for annotations label with highlight and represent

    Returns:
    List: Whether each block is Highlight or not
    """
    global video_path
    highlight_map = {}

    video_path = annotation["video_path"]
    annotations = annotation["annots"]
    
    for annot in annotations:
        highlights = annot['highlight']

        for num in highlights:
            highlight_map[num] = 1
            
    ret = [0 for _ in range(block_num)]
    
    for key in highlight_map.keys():
        try:
            ret[key] = 1
        except:
            ret.append(1)

    video_frames = np.load(video_path)
    ret = quadrant_diff(video_frames, ret)
                
    return video_frames, ret

In [22]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)


1 Physical GPUs, 1 Logical GPUs


## Model - Video

In [17]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, TimeDistributed, Conv2D, MaxPooling2D, Flatten, LSTM, Dense

def create_cnn_lstm_model():
    # Define the input layer
    inputs = Input(shape=(9, 256, 256, 1))

    # CNN Layers
    x = TimeDistributed(Conv2D(32, (3, 3), activation='relu'))(inputs)
    x = TimeDistributed(MaxPooling2D((2, 2)))(x)
    x = TimeDistributed(Flatten())(x)

    # LSTM Layer
    x = LSTM(50)(x)

    # Output Layer
    outputs = Dense(3, activation='softmax')(x)

    model = Model(inputs=inputs, outputs=outputs)
    return model

# Instantiate and compile the model
with tf.device('/GPU:0'):  # 첫 번째 GPU를 사용
    model = create_cnn_lstm_model()
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [18]:
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 9, 256, 256, 1)]  0         
                                                                 
 time_distributed_3 (TimeDis  (None, 9, 254, 254, 32)  320       
 tributed)                                                       
                                                                 
 time_distributed_4 (TimeDis  (None, 9, 127, 127, 32)  0         
 tributed)                                                       
                                                                 
 time_distributed_5 (TimeDis  (None, 9, 516128)        0         
 tributed)                                                       
                                                                 
 lstm_1 (LSTM)               (None, 50)                103235800 
                                                           

## Model Trainer 함수

In [19]:
def trainer(model):
    # data_list = os.listdir("processed/video") # 동영상 데이터 
    json_path = 'processed\label\processed_video_data.json'
    model_path = './video_model.h5'

    if os.path.exists(model_path):
        model = load_model(model_path)
        print("Loaded existing model.")
    else:
        print("No existing model found, starting training new model.")

    with open(json_path, 'r') as file:
        json_data = json.load(file)

    train_length = int(len(json_data) * 0.7)
    
    train_data = json_data[:train_length]
    test_data = json_data[train_length:]
    all_histories = []

    ## 학습 부분
    for i, json_dict in enumerate(train_data):
        video, label = parse_annotations(json_dict, json_dict['three_secs'][-1])
        
        X = video # i번 영상의 npy 파일
        y = np.array(label) # 1번 영상에 대한 각 블럭의 하이라이트 여부

        min_length = min(len(X), len(y))
        X, y = X[:min_length], y[:min_length]

        y = to_categorical(y, num_classes=3)
        
        history = model.fit(X, y, epochs=10, verbose=0)
        all_histories.append(history.history)  # Save history
        model.save('./video_model.h5')

    ## Test
    # for i in range(len(test_data)):
    #     label = np.array(parse_annotations(json_data[i]['annots'], json_data[i]['three_secs'][-1] + 1))
        
    #     X = test_data[i] # i번 영상의 npy 파일
    #     y = label # 1번 영상에 대한 각 블럭의 하이라이트 여부
        
    #     print(f"Test {i} :: {model.evaluate(X, y)}")

    return model, all_histories

In [20]:
def plot_training_history(histories):
    epochs = range(1, len(histories[0]['loss']) + 1)
    all_loss = [h['loss'] for h in histories]
    all_acc = [h['accuracy'] for h in histories]

    plt.figure(figsize=(14, 5))
    plt.subplot(1, 2, 1)
    for i, loss in enumerate(all_loss):
        plt.plot(epochs, loss, label=f'Training {i+1}')
    plt.title('Loss over training videos')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    for i, acc in enumerate(all_acc):
        plt.plot(epochs, acc, label=f'Training {i+1}')
    plt.title('Accuracy over training videos')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [21]:
model_traied, history = trainer(model)

Loaded existing model.


ResourceExhaustedError: Graph execution error:

Detected at node 'gradient_tape/model/time_distributed_1/max_pooling2d/MaxPool/MaxPoolGrad' defined at (most recent call last):
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
      app.start()
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
      self.io_loop.start()
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\asyncio\base_events.py", line 601, in run_forever
      self._run_once()
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\asyncio\base_events.py", line 1905, in _run_once
      handle._run()
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
      await result
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\ipkernel.py", line 359, in execute_request
      await super().execute_request(stream, ident, parent)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\ipkernel.py", line 446, in do_execute
      res = shell.run_cell(
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3006, in run_cell
      result = self._run_cell(
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3061, in _run_cell
      result = runner(coro)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3266, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3445, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\IPython\core\interactiveshell.py", line 3505, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\daeho\AppData\Local\Temp\ipykernel_40680\2640403355.py", line 1, in <module>
      model_traied, history = trainer(model)
    File "C:\Users\daeho\AppData\Local\Temp\ipykernel_40680\504081248.py", line 33, in trainer
      history = model.fit(X, y, epochs=10, verbose=0)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\engine\training.py", line 997, in train_step
      self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 576, in minimize
      grads_and_vars = self._compute_gradients(
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 634, in _compute_gradients
      grads_and_vars = self._get_gradients(
    File "c:\Users\daeho\anaconda3\envs\tf_ds\lib\site-packages\keras\optimizers\optimizer_v2\optimizer_v2.py", line 510, in _get_gradients
      grads = tape.gradient(loss, var_list, grad_loss)
Node: 'gradient_tape/model/time_distributed_1/max_pooling2d/MaxPool/MaxPoolGrad'
OOM when allocating tensor with shape[288,254,254,32] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node gradient_tape/model/time_distributed_1/max_pooling2d/MaxPool/MaxPoolGrad}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_7633]

In [None]:
plot_training_history(history)

## Train Audio model

In [29]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

def create_simple_cnn():
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(64, 64, 3)),
        MaxPooling2D((2, 2)),

        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),

        Conv2D(128, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),

        Flatten(),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(3, activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model


In [31]:
model_audio = create_simple_cnn()
model_audio.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_5 (Conv2D)           (None, 62, 62, 32)        896       
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 31, 31, 32)       0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 29, 29, 64)        18496     
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 14, 14, 64)       0         
 2D)                                                             
                                                                 
 conv2d_7 (Conv2D)           (None, 12, 12, 128)       73856     
                                                                 
 max_pooling2d_7 (MaxPooling  (None, 6, 6, 128)       

In [32]:
# Load JSON data
with open('test.json', 'r') as file:
    json_data = json.load(file)

def parse_annotations(annotation:list, block_num:int):
    """
    Extracts Every Annotation from json label file
    
    Args:
    annotations(List): List of Dictionary for annotations label with highlight and represent

    Returns:
    List: Whether each block is Highlight or not
    """
    global video_path
    highlight_map = {}

    video_path = annotation["audio_path"]
    annotations = annotation["annots"]
    
    for annot in annotations:
        highlights = annot['highlight']

        for num in highlights:
            highlight_map[num] = 1
            
    ret = [0 for _ in range(block_num)]
    
    for key in highlight_map.keys():
        try:
            ret[key] = 1
        except:
            ret.append(1)

    video_frames = np.load(video_path)
    ret = quadrant_diff(video_frames, ret)
                
    return video_frames, ret

In [33]:
def trainer(model):
    # data_list = os.listdir("processed/video") # 동영상 데이터 
    json_path = 'processed\label\processed_video_data.json'
    model_path = './video_model.h5'

    if os.path.exists(model_path):
        model = load_model(model_path)
        print("Loaded existing model.")
    else:
        print("No existing model found, starting training new model.")

    with open(json_path, 'r') as file:
        json_data = json.load(file)

    train_length = int(len(json_data) * 0.7)
    
    train_data = json_data[:train_length]
    test_data = json_data[train_length:]
    all_histories = []

    ## 학습 부분
    for i, json_dict in enumerate(train_data):
        video, label = parse_annotations(json_dict, json_dict['three_secs'][-1])
        
        X = video # i번 영상의 npy 파일
        y = np.array(label) # 1번 영상에 대한 각 블럭의 하이라이트 여부

        min_length = min(len(X), len(y))
        X, y = X[:min_length], y[:min_length]

        y = to_categorical(y, num_classes=3)
        
        history = model.fit(X, y, epochs=10, verbose=0)
        all_histories.append(history.history)  # Save history
        model.save('./audio_model.h5')

    return model, all_histories

In [34]:
trainer(model_audio)

KeyboardInterrupt: 