In [2]:
# First half, create fisher vectors for classification

import os
from os.path import join
import numpy as np
import sklearn.decomposition as decomp
from sklearn.mixture import GaussianMixture as GMM
from sklearn.svm import LinearSVC

from video_representation import VideoRepresentation
from transforms import *
from settings import *
from visualize import *
from tqdm import tqdm

already_computed_descriptors=True

try:
    os.makedirs(video_descriptors_path)
except Exception as error:
    pass

train_videos = []
test_videos = []

# COMPUTE DESCRIPTORS
if not already_computed_descriptors:
    for id, directory in enumerate(next(os.walk(data_dir))[1]):
        if id == 0: # debugging, skip ollie directory
            continue
        directory_path = join(data_dir, directory)
        print(f'\n________EXTRACTING DESCRIPTORS FROM {directory_path}')
        for filename in tqdm(os.listdir(directory_path)):
            filepath = join(directory_path, filename)
            # check if file already exits
            descriptors_txt_path = os.path.join(video_descriptors_path, f'{filename.split(".")[0]}-descriptors.txt')
            descriptor_found = False
            if os.path.isfile(descriptors_txt_path):
                print('descriptor file already exists: ', descriptors_txt_path)
                descriptor_found = True
            if '.avi' in filename and os.path.isfile(filepath) and not descriptor_found:
                # task 1
                trajectories_list = trajectories_from_video(filepath)
                # task 2
                # saves descriptors to disk
                descriptors_from_trajectories(trajectories_list, filename)

# TRAIN
train_lines = []
with open(join(data_dir, 'train.txt'), 'r') as train_f:
    train_lines = train_f.readlines()
for l in train_lines:
    # Handle skateboard trick file paths
    label = 0
    filepath = l.replace('\n', '')
    if 'Kickflip' in l:
        label = 1
    # filepath, label = l.split() junp.avi 1
    
    descriptor_path = join(video_descriptors_path,
                        f'{filepath.split("/")[1].replace(".avi", "-descriptors.txt")}')
    video_representation = VideoRepresentation(filepath, np.loadtxt(descriptor_path), label)
    train_videos.append(video_representation)

all_train_descriptors = np.concatenate([v.descriptors for v in train_videos], axis=0)
print(f'total number of train descriptors: {all_train_descriptors.shape[0]}')
print(f'length of each train descriptor: {all_train_descriptors.shape[1]}')

# init and fit the pca
pca = decomp.PCA(pca_num_components)
pca = pca.fit(all_train_descriptors)

# transform descriptors of each video
for v in train_videos:
    v.pca_descriptors = pca.transform(v.descriptors)

# concatenate the pca-transformed descriptors, to not transform the whole data one extra time
all_train_descriptors = np.concatenate([v.pca_descriptors for v in train_videos], axis=0)
print(f'length each train descriptor after pca: {all_train_descriptors.shape[1]}')

total number of train descriptors: 1050128
length of each train descriptor: 426
length each train descriptor after pca: 64
calculated Fisher vectors


In [14]:
# learn GMM model
gmm = GMM(n_components=gmm_n_components, covariance_type='diag')
gmm.fit(all_train_descriptors)

# compute fisher vectors for each train video
for v in train_videos:
    v.fisher_vector = fisher_from_descriptors(v.pca_descriptors, gmm)
print('calculated Fisher vectors')

In [10]:
# Standard model

# initialize and fit a linear SVM
model = LinearSVC()
model.fit(X=[v.fisher_vector for v in train_videos], y=[v.label for v in train_videos])
print('fitted linear SVM')

fitted linear SVM




In [11]:
# Check training accuracy
training_accuracy = model.score(X=[v.fisher_vector for v in train_videos], y=[v.label for v in train_videos])
print(f'training accuracy: {training_accuracy}')

# Check individual labels
prediction = model.predict(X=[v.fisher_vector for v in train_videos])
for i, v in enumerate(train_videos):
    v.predicted_label = prediction[i]
print('prediction by video: index, true label, predicted label, path\n')
for i, v in enumerate(train_videos):
    print(f'{i}    gt: {v.label}    pred: {v.predicted_label}   {v.filepath}')

training accuracy: 1.0
prediction by video: index, true label, predicted label, path

0    gt: 1    pred: 1   Kickflip/Kickflip0.avi
1    gt: 1    pred: 1   Kickflip/Kickflip1.avi
2    gt: 1    pred: 1   Kickflip/Kickflip2.avi
3    gt: 1    pred: 1   Kickflip/Kickflip3.avi
4    gt: 1    pred: 1   Kickflip/Kickflip4.avi
5    gt: 1    pred: 1   Kickflip/Kickflip5.avi
6    gt: 1    pred: 1   Kickflip/Kickflip6.avi
7    gt: 1    pred: 1   Kickflip/Kickflip7.avi
8    gt: 1    pred: 1   Kickflip/Kickflip8.avi
9    gt: 1    pred: 1   Kickflip/Kickflip9.avi
10    gt: 1    pred: 1   Kickflip/Kickflip10.avi
11    gt: 1    pred: 1   Kickflip/Kickflip11.avi
12    gt: 1    pred: 1   Kickflip/Kickflip12.avi
13    gt: 1    pred: 1   Kickflip/Kickflip13.avi
14    gt: 1    pred: 1   Kickflip/Kickflip14.avi
15    gt: 1    pred: 1   Kickflip/Kickflip15.avi
16    gt: 1    pred: 1   Kickflip/Kickflip16.avi
17    gt: 1    pred: 1   Kickflip/Kickflip17.avi
18    gt: 1    pred: 1   Kickflip/Kickflip18.avi
19 

In [13]:
# TEST

print('model: ', model)

test_lines = []
with open(join(data_dir, 'test.txt'), 'r') as test_f:
    test_lines = test_f.readlines()
for l in test_lines:
    # Handle skateboard trick file paths
    label = 0
    filepath = l.replace('\n', '')
    if 'Kickflip' in l:
        label = 1
    # filepath, label = l.split() junp.avi 1
    descriptor_path = join(video_descriptors_path,
                        f'{filepath.split("/")[1].replace(".avi", "-descriptors.txt")}')
    video_representation = VideoRepresentation(filepath, np.loadtxt(descriptor_path), label)
    test_videos.append(video_representation)

# reduce dimension of all test descriptors using pca fitted on train data
for v in test_videos:
    v.pca_descriptors = pca.transform(v.descriptors)
print('reduced dimensions of the test data')

# calculate a fisher vector for each test video based on the gmm model fit on the train data
for v in test_videos:
    v.fisher_vector = fisher_from_descriptors(v.pca_descriptors, gmm)
print('calculated Fisher vectors on the test data')

# predict the labels of the test videos
accuracy = model.score(X=[v.fisher_vector for v in test_videos], y=[v.label for v in test_videos])
print(f'accuracy: {accuracy}')
prediction = model.predict(X=[v.fisher_vector for v in test_videos])
for i, v in enumerate(test_videos):
    v.predicted_label = prediction[i]
print('prediction by video: index, true label, predicted label, path\n')
for i, v in enumerate(test_videos):
    print(f'{i}    gt: {v.label}    pred: {v.predicted_label}   {v.filepath}')





    # to test trajectories on a single video
    # trajectories_from_video('data_avi_hd/Ollie/Ollie20.avi', vis_flow=False, vis_trajectories=True)

model:  LinearSVC()
reduced dimensions of the test data
calculated Fisher vectors on the test data
accuracy: 0.45569620253164556
prediction by video: index, true label, predicted label, path

0    gt: 1    pred: 1   Kickflip/Kickflip71.avi
1    gt: 1    pred: 0   Kickflip/Kickflip72.avi
2    gt: 1    pred: 1   Kickflip/Kickflip73.avi
3    gt: 1    pred: 1   Kickflip/Kickflip74.avi
4    gt: 1    pred: 0   Kickflip/Kickflip75.avi
5    gt: 1    pred: 0   Kickflip/Kickflip76.avi
6    gt: 1    pred: 0   Kickflip/Kickflip77.avi
7    gt: 1    pred: 1   Kickflip/Kickflip78.avi
8    gt: 1    pred: 1   Kickflip/Kickflip79.avi
9    gt: 1    pred: 0   Kickflip/Kickflip80.avi
10    gt: 1    pred: 1   Kickflip/Kickflip81.avi
11    gt: 1    pred: 1   Kickflip/Kickflip82.avi
12    gt: 1    pred: 0   Kickflip/Kickflip83.avi
13    gt: 1    pred: 0   Kickflip/Kickflip84.avi
14    gt: 1    pred: 0   Kickflip/Kickflip85.avi
15    gt: 1    pred: 1   Kickflip/Kickflip86.avi
16    gt: 1    pred: 0   Kickflip/