# Exploratory Data Analysis for HOG (histogram of gradients)

Import libraries:

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Some Matplotlib configuration:

In [2]:
font = { 'family': 'DejaVu Sans', 'weight': 'bold', 'size': 22 }
plt.rc('font', **font)

Import the config file which contains all the paths:

In [3]:
import sys

In [4]:
sys.path.append('..')

In [5]:
import config

**HOG Features**

In [6]:
config.DEV_FEATURES

'/datasets/dev-set/features'

In [7]:
config.DEV_FEATURES_LIST

['C3D',
 'HMP',
 'InceptionV3',
 'LBP',
 'aesthetic_feat_dev-set_mean',
 'ColorHistogram',
 'HOG',
 'ORB']

In [8]:
config.DEV_HOG_FEATURE

'/datasets/dev-set/features/HOG'

In [9]:
HOG_FILENAMES = os.listdir(config.DEV_HOG_FEATURE)

In [10]:
HOG_FILENAMES[:5]

['video818-0.txt',
 'video3493-112.txt',
 'video6172-0.txt',
 'video1264-0.txt',
 'video3776-56.txt']

In [11]:
len(HOG_FILENAMES)

24000

In [12]:
video_names = list(set([ video.split('-')[0] for video in HOG_FILENAMES ]))

In [13]:
len(video_names)

8000

In [14]:
def read_features(video_name, frame, feature_name):
    filename = '{}-{}.txt'.format(video_name, frame)
    with open(filename) as f:
        return { '{}_{}_{}'.format(feature_name, frame, i + 1) : float(feature) 
                for i, feature in enumerate(f.read().split()) }

In [15]:
NUM_FEATURES = len(read_features(os.path.join(config.DEV_HOG_FEATURE, video_names[0]), 0, 'HOG'))

In [26]:
for video in video_names:
    print(video, len(read_features(os.path.join(config.DEV_HOG_FEATURE, video), 0, 'HOG')))

video6499 9575
video7235 12367
video998 8954
video5445 11849
video1204 11002
video1177 7058
video7956 8867
video3193 13494
video7038 11821
video7664 12501
video5961 12604
video7090 13961
video9959 12123
video7909 12843
video2074 13240
video2256 6330
video3489 7211
video6007 2253
video1150 10802
video1253 4736
video1017 12055
video3696 12814
video8695 10360
video5968 8911
video6169 14334
video2052 12053
video2376 12050
video8498 12709
video3035 12240
video2424 12828
video8487 9171
video9733 10581
video7678 12586
video6694 8081
video3838 12933
video7405 12403
video1241 12138
video8516 11457
video4525 11746
video626 8729
video7967 10296
video1838 13715
video5491 13137
video5398 13080
video6670 11379
video7050 12432
video6182 11393
video7211 8495
video7341 10406
video2836 4941
video7555 11997
video5897 9274
video6894 11827
video8002 12193
video9815 9608
video120 10394
video8674 9966
video9313 12713
video6596 10049
video6885 9151
video571 9910
video899 10375
video3590 8987
video2583 10882
v

video7040 9919
video7156 9970
video9522 8313
video2552 3707
video1274 8220
video1753 12279
video686 10651
video2459 10603
video8311 14626
video9568 10356
video6961 10602
video3470 11263
video2786 11194
video393 11216
video9009 13146
video3642 9746
video5192 5522
video6476 9060
video4107 11978
video9939 14629
video8953 9855
video5866 10663
video2086 11806
video8748 10243
video9965 7604
video517 11840
video4746 11524
video4688 8855
video5685 10891
video3862 9503
video8474 4910
video6196 12954
video491 8473
video7803 7672
video4752 9188
video9304 11835
video6661 2705
video7351 10717
video1397 10299
video7992 10269
video2688 7480
video4341 12544
video1102 8998
video9323 13591
video8776 10174
video8327 11451
video1368 14129
video6671 10927
video2671 13338
video3505 11594
video1940 10147
video4281 6966
video8680 10859
video7096 8823
video3165 12222
video9495 9730
video6975 5486
video1601 7182
video3815 9945
video8981 12384
video6850 12320
video4935 11610
video933 5819
video9661 12196
video49

video4513 8145
video5386 9082
video7497 12601
video6359 10987
video6128 13929
video3280 11817
video2907 11422
video8301 10199
video9543 5365
video8297 10953
video3161 12345
video2742 11172
video5481 10054
video6783 13048
video591 6613
video5926 8817
video6055 12861
video5639 12994
video32 8057
video3048 10612
video7370 12482
video2573 11811
video6041 10140
video6872 13555
video400 8270
video1747 13085
video6579 9296
video8000 11361
video2246 11816
video8309 12915
video4805 12268
video2129 6834
video3318 14201
video7899 12073
video8084 12616
video8579 9862
video339 12334
video5938 11177
video9260 2602
video8658 14267
video3777 13173
video8155 10127
video9381 9737
video9560 9009
video2620 9578
video9935 12222
video2103 3691
video3814 12655
video3323 11473
video3804 4447
video7408 9920
video8195 12272
video2939 12464
video494 10608
video5906 6539
video9187 11376
video6260 12904
video8076 4228
video7770 11402
video2148 11936
video3657 6466
video7811 11079
video5167 12056
video2150 13311
vi

KeyboardInterrupt: 

In [16]:
NUM_FEATURES

9575

In [17]:
def read_features_optimized(video_name, frame):
    filename = '{}-{}.txt'.format(video_name, frame)
    with open(filename) as f:
        return ','.join(f.read().split())

In [18]:
# read_features_optimized(os.path.join(config.DEV_HOG_FEATURE, video_names[0]), 0).split()

In [19]:
frames = [0, 56, 112]

In [20]:
# [ 'HOG_{}_{}'.format(frame, i) for frame in frames for i in range(NUM_FEATURES)  ]

In [21]:
with open('../../data/HOG_train.csv', 'w') as f: 
    # Header
    header = 'video,' + ','.join([ 'HOG_{}_{}'.format(frame, i) for frame in frames for i in range(NUM_FEATURES)  ])
    f.write(header + '\n')
    # Rows
    for i, video in enumerate(video_names):
        # print('Iteration: {}'.format(i))
        # Get features per video
        video_features = video.split('.txt')[0] + '.webm'
        for frame in frames:
            video_features += ',' + read_features_optimized(os.path.join(config.DEV_HOG_FEATURE, video), frame)
        # write it!
        f.write(video_features + '\n')

In [22]:
!cat '../../data/HOG_train.csv' | wc -l

8001


In [23]:
HOG_train_df = pd.read_csv('../../data/HOG_train.csv')

ParserError: Error tokenizing data. C error: Expected 29663 fields in line 3, saw 35582


In [None]:
# videos have different HOGs depending on the image...