In [2]:

from cal_optical_flow_seq import extract_flow_seq_train
from cal_optical_flow_seq import extract_flow_val
from evaluate import evaluate
from clustering import make_codebook
from clustering import extract_vec_points
from create_bovw import create_bovw_traindf
from create_bovw import create_bovw_testdf
from utils import get_video_label
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import os
import pickle
import pandas as pd
from sklearn.externals import joblib
from sklearn import decomposition
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.naive_bayes import MultinomialNB
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.metrics import classification_report

  from numpy.core.umath_tests import inner1d


In [None]:
# Paths and parameters
#train_data_path = '/media/anjali/My Passport/KTH/kth_actions_train'
DATASET = '/opt/datasets/KTH'

#val_data_path = '/media/anjali/My Passport/KTH/kth_actions_validation'
flow_filename = "flow_data"
km_model_filename = "km"
destpath = "data"
mnb_modelname='mnb_modelname'
cluster_size=1000
no_topics = 6  
#clf_modelname = "model_svm"
    
def main():
    ###########################################################################
    # Step 1: Extract optical flow from training videos and save to disk
    # calculate optical flow vectors of training dataset
    print("Extract optical flow data for training set...")
    flow_filepath = os.path.join(destpath, flow_filename+"_train.pkl")
    
    #features = extract_flow_seq_train(DATASET, grid_size=10)
    if not os.path.exists(destpath):
        os.makedirs(destpath)
    #pickle.dump(features, open(flow_filepath, "wb"))
    print("Written training features to disk...")
    features = pickle.load(open(flow_filepath, 'rb'))
    
    # extract keypoints (optical flow vectors of consecutive frames for
    # all videos in training dataset)
    mag, ang = extract_vec_points(flow_filepath)
    #mag = extract_vec_points(flow_filepath)
    # change -inf value to 0,  
    mag[np.isinf(mag)] = 0
    #print("Magnitude : {}".format(mag.shape))
    #print("Angle : {}".format(ang.shape))
    ###########################################################################
    # Normalize
    mag_min, mag_max = np.min(mag), np.max(mag)
    ang_min, ang_max = np.min(ang), np.max(ang)
    mag = (mag - mag_min)/(mag_max - mag_min)
    ang = (ang - ang_min)/(ang_max - ang_min)
    
    ###########################################################################
    # Execute once while training KMeans, to find the cluster centroids
    # generate codebook using k-means clustering
    # The cluster centres represent the vocabulary words. 
    km_file_mag = os.path.join(destpath, km_model_filename+"_mag.pkl")
    km_file_ang = os.path.join(destpath, km_model_filename+"_ang.pkl")
    ##################################
    # Uncomment only while training.
    km_mag = make_codebook(mag, cluster_size)
    
    km_ang = make_codebook(ang, cluster_size)
    
    # Save to disk, if training is performed
    print("Writing the KMeans models to disk...")
    pickle.dump(km_mag, open(km_file_mag, "wb"))
    pickle.dump(km_ang, open(km_file_ang, "wb"))
    ##################################
    # Load from disk, for validation and test sets.
    km_mag = pickle.load(open(km_file_mag, 'rb'))
    km_ang = pickle.load(open(km_file_ang, 'rb'))
    ###########################################################################
    # Form the training dataset for supervised classification 
    # Assign the words (flow frames) to their closest cluster centres and count the 
    # frequency for each document(video). Create IDF bow dataframe by weighting
    # df_train is (nVids, 50) for magnitude, with index as videonames
    print("Create a dataframe for magnitudes and angles...")
    df_train_mag, df_train_ang = create_bovw_traindf(features, km_mag, km_ang,mag_min, mag_max, ang_min, ang_max,destpath)
    #print("printing df_train \n")
    #print(df_train_mag.head())
    vids_list = list(df_train_mag.index)
    labels = np.array([get_video_label(v) for v in vids_list])
    #print("printing labels :")
    #print(labels)
    #print(labels.shape)
    # form the labels dataframe having one columns of labels
    labs_df = pd.DataFrame(labels, index=vids_list, columns=['label'])
    #print(labs_df.head())
    # concat dataframe to contain features and corresponding labels
    df_train = pd.concat([df_train_mag, labs_df], axis=1)
    
    print("Training dataframe formed.")
    ###########################################################################
    # Train a classifier on the features.

    print("Training the model ")
   

# Run NMF
    #nmf = NMF(n_components=no_topics, random_state=1, alpha=.1, l1_ratio=.5, init='nndsvd').fit(df_train_mag)
    nmf = NMF(n_components=no_topics, random_state=1, alpha=.2, l1_ratio=.1, init='nndsvd').fit(df_train_mag)
#####################################################################################################################################1
    model_mag = MultinomialNB().fit(df_train_mag, labs_df['label'])
    all_predictions_mag = model_mag.predict(df_train_mag)
    #print(all_predictions_mag)
    #print (classification_report(labs_df['label'], all_predictions_mag))

    

    model_ang = MultinomialNB().fit(df_train_ang, labs_df['label'])
    #all_predictions_ang = model_mag.predict(df_train_ang)
    #print(all_predictions_ang)
    #print (classification_report(labs_df['label'], all_predictions_ang))

    print("training complete saving to disk ")
    #save model to disk 
    joblib.dump(model_mag, os.path.join(destpath, mnb_modelname+"_mag.pkl"))
    joblib.dump(model_ang, os.path.join(destpath, mnb_modelname+"_ang.pkl"))

    # Load trained model from disk
    model_mag = joblib.load(os.path.join(destpath, mnb_modelname+"_mag.pkl"))
    model_ang = joblib.load(os.path.join(destpath, mnb_modelname+"_ang.pkl"))
#####################################################################################################################################1

    # Evaluation on validation set
    # extract the optical flow information from the validation set videos and form dictionary
    
#\\\\\\\\
    bgthresh = 60000
    target_file = os.path.join(destpath, flow_filename+"_val_BG"+str(bgthresh)+".pkl")
#////////
    features_val = extract_flow_val(DATASET, bgthresh, grid_size=10, partition="validation")
    pickle.dump(features_val, open(target_file, "wb"))

    # Load feaures from disk
#\\\\\\\\\\\\\\\\\\\\\\\
    features_val = pickle.load(open(target_file, "rb"))
    
    print("Create dataframe BOVW validation set (mag)")
    df_test_mag, df_test_ang = create_bovw_testdf(features_val, km_mag, km_ang, \
                                                  mag_min, mag_max, ang_min, ang_max,\
                                                  destpath)
    vids_list = list(df_test_mag.index)
    labels = np.array([get_video_label(v) for v in vids_list])
    labs_df = pd.DataFrame(labels, index=vids_list, columns=['label'])
    
    print("Evaluating on the validation set (mag)")
    evaluate(model_mag, df_test_mag, labs_df)
    
    ###########################################################################

if __name__ == '__main__':
    main()

Extract optical flow data for training set...
Written training features to disk...
Video person17_jogging_d3_uncomp.avi : Size_mag : 223
Video person13_running_d1_uncomp.avi : Size_mag : 406
Video person17_walking_d2_uncomp.avi : Size_mag : 1049
Video person16_jogging_d4_uncomp.avi : Size_mag : 1362
Video person13_walking_d4_uncomp.avi : Size_mag : 1800
Video person16_boxing_d1_uncomp.avi : Size_mag : 2289
Video person17_boxing_d3_uncomp.avi : Size_mag : 2756
Video person18_jogging_d2_uncomp.avi : Size_mag : 3092
Video person17_walking_d1_uncomp.avi : Size_mag : 3521
Video person14_handwaving_d4_uncomp.avi : Size_mag : 4147
Video person14_walking_d2_uncomp.avi : Size_mag : 4738
Video person17_handclapping_d1_uncomp.avi : Size_mag : 5138
Video person16_boxing_d4_uncomp.avi : Size_mag : 5593
Video person12_handclapping_d1_uncomp.avi : Size_mag : 5980
Video person18_walking_d3_uncomp.avi : Size_mag : 6340
Video person11_jogging_d1_uncomp.avi : Size_mag : 6579
Video person16_walking_d3_unc

Clustering using KMeans: Input size -> (74016, 192) :: n_clusters -> 1000
