In [None]:
import numpy as np
import os, cv2

import tensorflow as tf
from tensorflow.keras.applications import vgg16
from tensorflow.keras.applications.vgg16 import preprocess_input

Mounting Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
video_path = '/content/drive/MyDrive/Video_Captioning/filtered_YT_clips'
video_file_paths = [ video_path +'/'+ x for x in sorted(os.listdir(video_path))]


## Extracting Frames from video

In [None]:
def extract_frames(video_file_path): # Single video file
  video_cap = cv2.VideoCapture(video_file_path)
  count = 0 # to keep track of number of frames captured
  # flag = 1 # to check if the VideoCapture object is able to read next frame or not
  frames_path = []

  # Looping over to capture all the frames
  while video_cap.isOpened(): # Just like while not the end of video
    flag, frame = video_cap.read() # reads the next frame from the VideoCapture object and sets flag to True if a frame is successfully read
    if flag:
      ext_path = video_file_path[:57] + '/../filtered_extracted_frames/' + video_file_path[58:-4]
      if not os.path.isdir(ext_path):
        os.mkdir(ext_path)
      extracting_path = ext_path + '/' + video_file_path[58:-4] + '_' + str(count) + '.jpg'
      cv2.imwrite(extracting_path, frame)
      frames_path.append(os.path.join(extracting_path))
      count += 1
    else:
      break
      
  video_cap.release()
  cv2.destroyAllWindows()

  return frames_path

## Extracting features with VGG16

In [None]:
base_model = vgg16.VGG16(
    include_top = True,
    weights='imagenet',
    input_shape = (224, 224, 3)
)
base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
out = base_model.layers[-2].output
model = tf.keras.Model(inputs=base_model.input, outputs=out)
model.summary()

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [None]:
def load_image(frame_path):
  img = cv2.imread(frame_path)
  img = cv2.resize(img, (224, 224))
  return img

In [None]:
def extract_features(video_file_path, model):
  frames_path = extract_frames(video_file_path)
  samples = np.round(np.linspace(0, len(frames_path)-1, 80)) # only 80 frames are considered
  sampled_frames_path = [frames_path[int(sample)] for sample in samples] # (80, )
  frames = np.zeros((len(sampled_frames_path), 224, 224, 3)) # (80, 224, 224, 3)
  for i, s in enumerate(sampled_frames_path):
    img = load_image(s)
    frames[i] = img
  frames = np.array(frames)
  features = np.array(model.predict(frames)) # (4096, 80)

  return features


In [None]:
def extract_features_and_save_numpy(video_file_paths, model):
  features_save_path = '/content/drive/MyDrive/Video_Captioning/filtered_features'
  if not os.path.isdir(features_save_path):
    os.mkdir(features_save_path)

  for video_file_path in video_file_paths:
    features = extract_features(video_file_path, model) # (4096, 80) for each video
    np.save(features_save_path +'/' + video_file_path[58:-4] + '.npy', features)

In [None]:
extract_features_and_save_numpy(video_file_paths, model)

