# Exploratory Data Analysis for HMP (histogram of motion patterns)

Import libraries:

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

Some Matplotlib configuration:

In [2]:
font = { 'family': 'DejaVu Sans', 'weight': 'bold', 'size': 22 }
plt.rc('font', **font)

Import the config file which contains all the paths:

In [3]:
import sys

In [4]:
sys.path.append('..')

In [5]:
import config

**HMP Features**

In [6]:
config.DEV_FEATURES

'/datasets/dev-set/features'

In [7]:
config.DEV_FEATURES_LIST

['C3D',
 'HMP',
 'InceptionV3',
 'LBP',
 'aesthetic_feat_dev-set_mean',
 'ColorHistogram',
 'HOG',
 'ORB']

In [8]:
config.DEV_HMP_FEATURE

'/datasets/dev-set/features/HMP'

In [9]:
HMP_FILENAMES = os.listdir(config.DEV_HMP_FEATURE)

In [10]:
HMP_FILENAMES[:5]

['video3094.txt',
 'video4963.txt',
 'video5381.txt',
 'video3388.txt',
 'video6587.txt']

In [11]:
'{:,}'.format(len(HMP_FILENAMES))

'8,000'

In [12]:
'{:,}'.format(config.HMP_DIM)

'6,075'

In [13]:
def read_features(filename):
    with open(filename) as f:
        return { 'HMP_{}'.format(i + 1) : float(feature.split(':')[1]) for i, feature in enumerate(f.read().split()) }

In [14]:
# read_features(os.path.join(config.DEV_HMP_FEATURE, HMP_FILENAMES[0]))

In [15]:
def read_features_optimized(filename):
    with open(filename) as f:
        return ','.join([ feature.split(':')[1] for feature in f.read().split() ])

In [16]:
# read_features_optimized(os.path.join(config.DEV_HMP_FEATURE, HMP_FILENAMES[0]))

In [17]:
with open('../../data/HMP_train.csv', 'w') as f: 
    # Header
    header = 'video,' + ','.join([ 'HMP_{}'.format(i) for i in range(config.HMP_DIM) ])
    f.write(header + '\n')
    # Rows
    for i, video in enumerate(HMP_FILENAMES):
        # print('Iteration: {}'.format(i))
        # Get features per video
        video_features = video.split('.txt')[0] + '.webm'
        video_features += ',' + read_features_optimized(os.path.join(config.DEV_HMP_FEATURE, video))
        # write it!
        f.write(video_features + '\n')

In [18]:
!cat '../../data/HMP_train.csv' | wc -l

8001
