In [1]:
session = !nvidia-smi
print('\n'.join(session))

Sat Apr 24 15:19:42 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   53C    P8    10W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

# Imports

In [2]:
import cv2
import numpy as np
import pandas as pd
from PIL import Image
from random import randint
import matplotlib.pyplot as plt

from keras.models import Model
from keras.layers import Input
from keras.applications.inception_v3 import InceptionV3, preprocess_input

In [3]:
from google.colab import drive
drive.mount('/content/gdrive/')

Mounted at /content/gdrive/


In [4]:
dataset_dir = "/content/gdrive/My Drive/Colab Notebooks/project/dataset"
samples_dir = f"{dataset_dir}/raw"
output_dir  = f"{dataset_dir}/extracted"
labels_path = f"{dataset_dir}/raw_labels.csv"
labels = pd.read_csv(labels_path) # df object with index, file name, etc.
print(f"label dataframe object structure: {labels.columns.tolist()}")

label dataframe object structure: ['Unnamed: 0', 'directory', 'file_name', 'extension', 'frames', 'ed', 'es', 'diff']


In [5]:
length = 50
buffer = 5

In [6]:
# turn a string path to terminal-readable
def _ter(path):
  return path.replace(' ', "\ ")

In [7]:
# temporary directories config
temp_path = "/content/temporary"
temp_path_ = temp_path + "/"
temp_frames_path = temp_path_ + "frames"

# string paths converted to terminal accepted paths
_temp_path = _ter(temp_path)
_temp_path_ = _ter(temp_path_)
_temp_frames_path = _ter(temp_frames_path)

# initialisation of the temporary directory where frames are gen
def init_temp():
  # removing temporary folder
  !rm -rf $_temp_path
  # making temporary folder for frames
  # ...and redirecting standard output to following dir
  !mkdir -p $_temp_frames_path > /dev/null

print(_temp_path)
print(_temp_path_)
print(_temp_frames_path)

/content/temporary
/content/temporary/
/content/temporary/frames


In [8]:
input = (299, 299, 3)
iv3 = InceptionV3(input_tensor=Input(input), weights='imagenet', include_top=True)
cnn = Model(inputs=iv3.input, outputs=iv3.get_layer('avg_pool').output)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5


In [9]:
def seq2frame(file_path, start, end):
  # function gens frames placed in /content/temporary/frames/ directory
  cap = cv2.VideoCapture(file_path)
  # beginning of the name of frame file
  frame_name = 'frame'
  # used to name output frames
  count = 0
  # frame file path
  frame_file_path = "%s/%s_" % (_temp_frames_path, frame_name)
  while True:
    success, image = cap.read()
    if (not success):
      break
    if count == end:
      break
    if count >= start and count <= end:
      # name of the frame file
      frame_file_name = "%s%d.png" % (frame_file_path, count)
      # saves images to temporary frame folder
      plt.imsave(frame_file_name, image, cmap = plt.cm.gray)
    count += 1
  return frame_file_path, count

def prepare_frame(frame_path):
  # open and convert image to RGB 299x299
  img_obj = Image.open(frame_path)
  img_obj = img_obj.convert('RGB')
  img_obj = img_obj.resize((299, 299))
  # normalise and preprocess image
  img_arr = (np.array(img_obj) / 255.).astype(np.float32) 
  img_arr = preprocess_input(img_arr)
  return img_arr

def calc_range(frames, ed, es, diff):
  # available padding before and after the ed and es frames respectively
  ava_before = ed
  ava_after = frames - es
  # required padding from either before or after the ed to es phase
  req_padding = length - diff
  # actual padding to be used before and after the ed and es frames
  acc_before = randint(buffer, req_padding - buffer)
  acc_after = req_padding - acc_before
  # note: actual values may be larger than available range
  while True:
    # ensuring actual values are in the available range
    if ava_before < acc_before:
      acc_before -= 1
      acc_after += 1
    if ava_after < acc_after:
      acc_after -= 1
      acc_before += 1
    if (ava_before >= acc_before) and (ava_after >= acc_after):
      # breaking out of the loop only if both acc's are within ava's
      break
  return acc_before, acc_after

def feature_extract_video(video_path, frames, ed, es, diff):
  # removing any files in temporary directories
  init_temp()
  # start/stop frames: where in the entire seq to start and stop for 50 length
  before, after = calc_range(frames, ed, es, diff)
  start = ed - before
  stop = es + after
  # transfering video to frames, and getting all frame names
  frames_base_path, frames_count = seq2frame(video_path, start, stop)
  # loading sequence into array, with preprocessor
  sequence = []
  for x in range(start, stop):
    frame_path = frames_base_path + str(x) + ".png"
    img_arr = prepare_frame(frame_path)
    sequence.append(img_arr)
  # getting features for entire sample
  video_features = cnn.predict(np.array(sequence))
  # find relative ed and es frame positions due to video trim
  rel_ed = before
  rel_es = before + diff
  return video_features, rel_ed, rel_es

In [10]:
relative_labels = []

# label_subset = labels.values
label_subset = labels.values[3000:]

for index, sample in enumerate(label_subset):
  # progress details
  print(f"{index+1}/{len(label_subset)}")
  # get sample details
  _, directory, file_name, extension, frames, ed, es, diff = sample
  path = directory + file_name + extension
  # extract features and relative ed and es frame position (after trim)
  features, rel_ed, rel_es = feature_extract_video(path, frames, ed, es, diff)
  # save features to .npy file
  feature_path = f"{output_dir}/{file_name}"
  np.save(feature_path, features)
  # gather and append relative info to list
  rel_label = [output_dir + "/", file_name, ".npy", rel_ed, rel_es]
  relative_labels.append(rel_label)

1/1006
2/1006
3/1006
4/1006
5/1006
6/1006
7/1006
8/1006
9/1006
10/1006
11/1006
12/1006
13/1006
14/1006
15/1006
16/1006
17/1006
18/1006
19/1006
20/1006
21/1006
22/1006
23/1006
24/1006
25/1006
26/1006
27/1006
28/1006
29/1006
30/1006
31/1006
32/1006
33/1006
34/1006
35/1006
36/1006
37/1006
38/1006
39/1006
40/1006
41/1006
42/1006
43/1006
44/1006
45/1006
46/1006
47/1006
48/1006
49/1006
50/1006
51/1006
52/1006
53/1006
54/1006
55/1006
56/1006
57/1006
58/1006
59/1006
60/1006
61/1006
62/1006
63/1006
64/1006
65/1006
66/1006
67/1006
68/1006
69/1006
70/1006
71/1006
72/1006
73/1006
74/1006
75/1006
76/1006
77/1006
78/1006
79/1006
80/1006
81/1006
82/1006
83/1006
84/1006
85/1006
86/1006
87/1006
88/1006
89/1006
90/1006
91/1006
92/1006
93/1006
94/1006
95/1006
96/1006
97/1006
98/1006
99/1006
100/1006
101/1006
102/1006
103/1006
104/1006
105/1006
106/1006
107/1006
108/1006
109/1006
110/1006
111/1006
112/1006
113/1006
114/1006
115/1006
116/1006
117/1006
118/1006
119/1006
120/1006
121/1006
122/1006
123/1006
1

In [11]:
df = pd.DataFrame(relative_labels, columns=['directory', 'file_name', 'extension', 'ed', 'es'])
df

Unnamed: 0,directory,file_name,extension,ed,es
0,/content/gdrive/My Drive/Colab Notebooks/proje...,0X1DE88A76BA76EB42,.npy,20,37
1,/content/gdrive/My Drive/Colab Notebooks/proje...,0X4E38E030A745C3A7,.npy,14,30
2,/content/gdrive/My Drive/Colab Notebooks/proje...,0X34F7CF2069479824,.npy,30,45
3,/content/gdrive/My Drive/Colab Notebooks/proje...,0XAEA31035D9CAA52,.npy,10,29
4,/content/gdrive/My Drive/Colab Notebooks/proje...,0X6D06CFF7DED256F,.npy,10,29
...,...,...,...,...,...
1001,/content/gdrive/My Drive/Colab Notebooks/proje...,0X43664BF0CDA9C803,.npy,14,33
1002,/content/gdrive/My Drive/Colab Notebooks/proje...,0X633F7FD1EA0A3F2C,.npy,9,25
1003,/content/gdrive/My Drive/Colab Notebooks/proje...,0X11BDF610427B903F,.npy,22,38
1004,/content/gdrive/My Drive/Colab Notebooks/proje...,0X19E82707BBBA6452,.npy,14,34


In [12]:
df.to_csv(dataset_dir+"/relative_labels.csv")