## Feature Extraction

**Packages**

In [1]:
import os
import pandas as pd
import numpy as np

import torch
import torchvision.models as tmodels
from torch.autograd import Variable
from torchvision import transforms

import cv2
from PIL import Image
import h5py
from tqdm import tqdm
from time import time

**Variables**

In [2]:
EXTRACT_FREQUENCY = 15
BATCH_SIZE = 16

args = {'use_cpu':True}

##### )) Paths

In [3]:

datasets_path='../data'
public_dataset_path=datasets_path+'/Public datasets'
tvsum_data = public_dataset_path+'/ydata-tvsum50-v1_1'
summe_data = public_dataset_path+'/SUMMe'
custom_data = datasets_path+'/Custom data'


features_path = 'extracted_features'
normalFt_path = features_path+'/normal'
hashbasedFt_path = features_path+'/hashbased'

##### )) GoogLeNet Model

In [4]:
googlenet = tmodels.googlenet(pretrained=True)
googlenet = torch.nn.Sequential(*list(googlenet.children())[:-2])
# googlenet.eval()



In [5]:
device = 'cuda' if torch.cuda.is_available() and not use_cpu else 'cpu'
googlenet = googlenet.to(device)

##### )) Frame preprocessing

In [5]:
  
def preprocess(frame):
    tr = transforms.Compose([
        transforms.Resize(224),
        # transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # BGR to RGB
    im = Image.fromarray(im) # cv2 to PIL
    im = tr(im)
    fr = np.array(im)
    
    return fr


### Extraction Modules

##### )) Hashed based features

In [7]:
def dhash(img):
    img=cv2.resize(img,(9,8),interpolation=cv2.INTER_CUBIC)
    # gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    gray=cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)

    dhash_str = ''
    for i in range(8):
        for j in range(8):
            if gray[i, j] > gray[i, j + 1]:
                dhash_str = dhash_str + '1'
            else:
                dhash_str = dhash_str + '0'
    result = ''
    for i in range(0, len(dhash_str), 4):
        result += ''.join('%x' % int(dhash_str[i: i + 4], 2))
    # print(result)
    return result

def hamming(s1, s2):
    assert len(s1) == len(s2)
    return sum([ch1 != ch2 for ch1, ch2 in zip(s1, s2)])

In [8]:
def hash_gen_features(path, threshold):
    cap = cv2.VideoCapture(path)
    frames=[]
    video_features = []
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = cap.get(cv2.CAP_PROP_FPS)
    count = 0
    skip_count = 0
    
    nframes = frame_count//EXTRACT_FREQUENCY * EXTRACT_FREQUENCY
    
    with torch.no_grad():
        base = None
        hash1 = None
        while cap.isOpened():
            # Capture frame-by-frame
            ret, fr = cap.read()
            if ret is False:
                break
            count += 1
            
            frame = preprocess(fr)

            if count % EXTRACT_FREQUENCY == 0:
                hash2=dhash(fr)
                if hash1 is not None:
                    dist = hamming(hash1,hash2)
                if base is None or dist > threshold:
                    base = fr
                    hash1 = hash2
                    frames.append(frame)
                else:
                    skip_count += 1
                    frames.append(frame)
                    
                if (len(frames) == BATCH_SIZE) or (count == nframes and len(frames) > 0):
                    batch = np.array(frames)
                    if args['use_cpu']:
                        variable = Variable(torch.from_numpy(batch).float())
                        feature = googlenet(variable).detach().numpy()
                    else:
                        variable = Variable(torch.from_numpy(batch).float()).cuda()
                        feature = googlenet(variable).cpu().detach().numpy()
                        
                    video_features.extend(feature)
                    frames.clear()
                    

    cap.release()
    video_features = np.squeeze(np.array(video_features))
    return video_features, frame_count, fps

##### )) Normal features

In [6]:
def gen_features(path):
    cap = cv2.VideoCapture(path)
    count = 0
    frames = []
    video_features = []
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    fps = cap.get(cv2.CAP_PROP_FPS)
    nframes = frame_count//EXTRACT_FREQUENCY * EXTRACT_FREQUENCY
    
    with torch.no_grad():
        
        while cap.isOpened():
            ret, fr = cap.read()
            if ret is False:
                break

            fr = preprocess(fr)
            
            count += 1
            if count % EXTRACT_FREQUENCY == 0:
                # frames.append(np.rollaxis(fr, 2))
                frames.append(fr)
                
                
            if (len(frames) == BATCH_SIZE) or (count == nframes and len(frames) > 0):
                batch = np.array(frames)
                # print(batch.shape)
                if args['use_cpu']:
                    variable = Variable(torch.from_numpy(batch).float())
                    feature = googlenet(variable).detach().numpy()
                else:
                    variable = Variable(torch.from_numpy(batch).float()).cuda()
                    feature = googlenet(variable).cpu().detach().numpy()
                video_features.extend(feature)
                frames.clear()
    
            
    cap.release()

    video_features = np.squeeze(np.array(video_features))
    return video_features, frame_count, fps

##### )) Feature extraction and storage

In [7]:
def extract_features(videos_path, h5output_path, method=None, names=None):
    h5_file = h5output_path
    # st = time()

    with h5py.File(h5_file, 'w') as f:
        files = os.listdir(videos_path)
        cnt = 0
        
        pbar = tqdm(total = len(files), position=0, leave=True)
        for file in files:
            cnt += 1
            path = videos_path + "/" + file
            
            if method == 'hashbased':
                video_features, fcnt, fps = hash_gen_features(path,4.0)
            else:
                video_features, fcnt, fps = gen_features(path)

            video_length = fcnt/fps
            
            if names:
                video_ = 'video_'+str(names.index(file.split(".")[0])+1)
            else:
                video_ = 'video_'+str(cnt)

            print(cnt,'.', video_, "no. of frames=", fcnt, "Output dimension=", video_features.shape)

            f.create_dataset(video_ + '/n_frames', data=int(fcnt))
            f.create_dataset(video_ + '/fps', data=int(fps))
            f.create_dataset(video_ + '/features', data=video_features)
            picks = np.arange(0, video_features.shape[0]) * EXTRACT_FREQUENCY
            f.create_dataset(video_ + '/picks', data=picks)
            f.create_dataset(video_+'/duration', data=video_length)
            f.create_dataset(video_+'/video_name', data=str(file))
            pbar.update(1)


### TVSum feature extraction

##### )) TVSumm info

In [8]:
# tvsum_info=pd.read_csv(tvsum_data+'/data/ydata-tvsum50-info.tsv',sep='\t')
# tvsum_info

Unnamed: 0,category,video_id,title,url,length
0,VT,AwmHb44_ouw,#1306 How to change tires for off road vehicle...,https://www.youtube.com/watch?v=AwmHb44_ouw,5:54
1,VT,98MoyGZKHXc,How to use a tyre repair kit - Which? guide,https://www.youtube.com/watch?v=98MoyGZKHXc,3:07
2,VT,J0nA4VgnoCo,#0001: FLAT TIRE,https://www.youtube.com/watch?v=J0nA4VgnoCo,9:44
3,VT,gzDbaEs1Rlg,ŠKODA Tips How to Repair Your Tyre,https://www.youtube.com/watch?v=gzDbaEs1Rlg,4:48
4,VT,XzYM3PfTM4w,When to Replace Your Tires GMC,https://www.youtube.com/watch?v=XzYM3PfTM4w,1:51
5,VU,HT5vyqe0Xaw,"The stuck truck of Mark, The rut that filled a...",https://www.youtube.com/watch?v=HT5vyqe0Xaw,5:22
6,VU,sTEELN-vY30,BBC - Train crash 2013,https://www.youtube.com/watch?v=sTEELN-vY30,2:29
7,VU,vdmoEJ5YbrQ,#453 girl gets van stuck in the back fourty [D...,https://www.youtube.com/watch?v=vdmoEJ5YbrQ,5:29
8,VU,xwqBXPGE9pQ,Smart Electric Vehicle Balances on Two Wheels,https://www.youtube.com/watch?v=xwqBXPGE9pQ,3:53
9,VU,akI8YFjEmUw,Electric cars making earth more green,https://www.youtube.com/watch?v=akI8YFjEmUw,2:13


##### )) Videos

##### )) Extraction

In [14]:
# vnames = tvsum_info['video_id'].tolist()
# extract_features(videos_path=tvsum_data+'/video', h5output_path=normalFt_path+'/TVSum.h5', names=vnames)

  2%|▏         | 1/50 [00:58<47:32, 58.21s/it]

1 . video_50 no. of frames= 6912.0 Output dimension= (460, 1024)


  4%|▍         | 2/50 [01:28<33:16, 41.59s/it]

2 . video_13 no. of frames= 3532.0 Output dimension= (235, 1024)


  6%|▌         | 3/50 [02:16<35:08, 44.86s/it]

3 . video_19 no. of frames= 5742.0 Output dimension= (382, 1024)


  8%|▊         | 4/50 [02:44<29:09, 38.03s/it]

4 . video_14 no. of frames= 4853.0 Output dimension= (323, 1024)


 10%|█         | 5/50 [03:19<27:50, 37.11s/it]

5 . video_30 no. of frames= 4005.0 Output dimension= (267, 1024)


 12%|█▏        | 6/50 [03:50<25:27, 34.72s/it]

6 . video_26 no. of frames= 3312.0 Output dimension= (220, 1024)


 14%|█▍        | 7/50 [04:32<26:37, 37.14s/it]

7 . video_2 no. of frames= 4688.0 Output dimension= (312, 1024)


 16%|█▌        | 8/50 [05:06<25:28, 36.39s/it]

8 . video_10 no. of frames= 3995.0 Output dimension= (266, 1024)


 18%|█▊        | 9/50 [06:13<31:23, 45.94s/it]

9 . video_1 no. of frames= 10597.0 Output dimension= (706, 1024)


 20%|██        | 10/50 [07:01<31:03, 46.58s/it]

10 . video_22 no. of frames= 5661.0 Output dimension= (377, 1024)


 22%|██▏       | 11/50 [08:56<43:45, 67.31s/it]

11 . video_12 no. of frames= 13511.0 Output dimension= (900, 1024)


 24%|██▍       | 12/50 [09:27<35:43, 56.41s/it]

12 . video_34 no. of frames= 3705.0 Output dimension= (247, 1024)


 26%|██▌       | 13/50 [11:17<44:50, 72.73s/it]

13 . video_21 no. of frames= 19406.0 Output dimension= (1293, 1024)


 28%|██▊       | 14/50 [13:36<55:30, 92.51s/it]

14 . video_46 no. of frames= 15307.0 Output dimension= (1020, 1024)


 30%|███       | 15/50 [14:00<42:02, 72.07s/it]

15 . video_38 no. of frames= 2941.0 Output dimension= (196, 1024)


 32%|███▏      | 16/50 [14:43<35:50, 63.24s/it]

16 . video_43 no. of frames= 4931.0 Output dimension= (328, 1024)


 34%|███▍      | 17/50 [15:20<30:28, 55.40s/it]

17 . video_42 no. of frames= 5939.0 Output dimension= (395, 1024)


 36%|███▌      | 18/50 [16:58<36:16, 68.02s/it]

18 . video_29 no. of frames= 17527.0 Output dimension= (1168, 1024)


 38%|███▊      | 19/50 [17:34<30:14, 58.53s/it]

19 . video_24 no. of frames= 4356.0 Output dimension= (290, 1024)


 40%|████      | 20/50 [18:37<29:55, 59.86s/it]

20 . video_4 no. of frames= 7210.0 Output dimension= (480, 1024)


 42%|████▏     | 21/50 [19:26<27:23, 56.69s/it]

21 . video_17 no. of frames= 5846.0 Output dimension= (389, 1024)


 44%|████▍     | 22/50 [20:48<29:57, 64.20s/it]

22 . video_6 no. of frames= 9671.0 Output dimension= (644, 1024)


 46%|████▌     | 23/50 [21:28<25:35, 56.86s/it]

23 . video_11 no. of frames= 4700.0 Output dimension= (313, 1024)


 48%|████▊     | 24/50 [21:49<19:57, 46.07s/it]

24 . video_45 no. of frames= 2500.0 Output dimension= (166, 1024)


 50%|█████     | 25/50 [23:48<28:22, 68.10s/it]

25 . video_3 no. of frames= 14019.0 Output dimension= (934, 1024)


 52%|█████▏    | 26/50 [24:38<25:05, 62.75s/it]

26 . video_49 no. of frames= 5971.0 Output dimension= (398, 1024)


 54%|█████▍    | 27/50 [25:16<21:05, 55.03s/it]

27 . video_44 no. of frames= 4304.0 Output dimension= (286, 1024)


 56%|█████▌    | 28/50 [25:38<16:35, 45.26s/it]

28 . video_32 no. of frames= 3802.0 Output dimension= (253, 1024)


 58%|█████▊    | 29/50 [26:14<14:50, 42.42s/it]

29 . video_48 no. of frames= 3896.0 Output dimension= (259, 1024)


 60%|██████    | 30/50 [27:10<15:33, 46.68s/it]

30 . video_20 no. of frames= 6241.0 Output dimension= (416, 1024)


 62%|██████▏   | 31/50 [27:51<14:12, 44.84s/it]

31 . video_47 no. of frames= 4740.0 Output dimension= (316, 1024)


 64%|██████▍   | 32/50 [29:27<18:05, 60.32s/it]

32 . video_40 no. of frames= 11414.0 Output dimension= (760, 1024)


 66%|██████▌   | 33/50 [30:23<16:39, 58.79s/it]

33 . video_25 no. of frames= 6580.0 Output dimension= (438, 1024)


 68%|██████▊   | 34/50 [31:34<16:39, 62.44s/it]

34 . video_41 no. of frames= 8073.0 Output dimension= (538, 1024)


 70%|███████   | 35/50 [33:08<17:58, 71.92s/it]

35 . video_27 no. of frames= 10917.0 Output dimension= (727, 1024)


 72%|███████▏  | 36/50 [33:30<13:19, 57.11s/it]

36 . video_39 no. of frames= 4166.0 Output dimension= (277, 1024)


 74%|███████▍  | 37/50 [34:08<11:08, 51.42s/it]

37 . video_7 no. of frames= 4468.0 Output dimension= (297, 1024)


 76%|███████▌  | 38/50 [34:43<09:17, 46.46s/it]

38 . video_37 no. of frames= 4009.0 Output dimension= (267, 1024)


 78%|███████▊  | 39/50 [35:43<09:13, 50.36s/it]

39 . video_8 no. of frames= 9870.0 Output dimension= (658, 1024)


 80%|████████  | 40/50 [36:32<08:21, 50.13s/it]

40 . video_31 no. of frames= 5412.0 Output dimension= (360, 1024)


 82%|████████▏ | 41/50 [37:58<09:08, 60.96s/it]

41 . video_16 no. of frames= 9535.0 Output dimension= (635, 1024)


 84%|████████▍ | 42/50 [39:07<08:26, 63.29s/it]

42 . video_36 no. of frames= 7959.0 Output dimension= (530, 1024)


 86%|████████▌ | 43/50 [39:57<06:54, 59.17s/it]

43 . video_23 no. of frames= 5631.0 Output dimension= (375, 1024)


 88%|████████▊ | 44/50 [41:53<07:38, 76.38s/it]

44 . video_33 no. of frames= 13365.0 Output dimension= (891, 1024)


 90%|█████████ | 45/50 [42:55<05:59, 71.99s/it]

45 . video_9 no. of frames= 7010.0 Output dimension= (467, 1024)


 92%|█████████▏| 46/50 [43:19<03:50, 57.63s/it]

46 . video_15 no. of frames= 4324.0 Output dimension= (288, 1024)


 94%|█████████▍| 47/50 [43:48<02:26, 48.88s/it]

47 . video_5 no. of frames= 3327.0 Output dimension= (221, 1024)


 96%|█████████▌| 48/50 [45:11<01:58, 59.20s/it]

48 . video_18 no. of frames= 9731.0 Output dimension= (648, 1024)


 98%|█████████▊| 49/50 [46:21<01:02, 62.34s/it]

49 . video_28 no. of frames= 8281.0 Output dimension= (552, 1024)


100%|██████████| 50/50 [46:59<00:00, 56.39s/it]

50 . video_35 no. of frames= 4463.0 Output dimension= (297, 1024)





###### TEST 2


In [10]:
# # vnames = tvsum_info['video_id'].tolist()
# extract_features(videos_path='data/Custom dataset', h5output_path=normalFt_path+'/sportsmeet_test.h5')

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'data/Custom dataset'

In [None]:
# extract_features(videos_path=tvsum_data+'/video', h5output_path=hashbasedFt_path+'/TVSum.h5',method='hashbased', names=vnames)

---------------------------

##### *H5File metadata*