# Train RBF SVM using only Bag of Visual words IDTF features

In [1]:
import pandas as pd
import sys, numpy as np
sys.path.append("/afs/cs.stanford.edu/u/anenberg/scr/caffe/python/")
import caffe, os
import lmdb
import pickle
import matplotlib

# location of the list that was used to create LMDB for training/testing
full_list = '../../data/ucf_recognition_20/lists/sampled_t1_p10_fullpath_all_list.txt'
train_list = '../../data/ucf_recognition_20/lists/sampled_t1_p10_fullpath_train_list.txt'
train_list = '../../data/ucf_recognition_20/lists/sampled_t1_p10_fullpath_test_list.txt'

num_train = 23924
num_test = 11190
num_frames = 35114
num_classes = 20 # the number of classes

from sklearn.svm import LinearSVC
from sklearn import svm
from sklearn.multiclass import OneVsRestClassifier
import sklearn.metrics as metrics
import pickle


##Load training data

In [2]:
train_bov = np.load('./bov_train.npz')
train_df = pd.DataFrame(train_bov['data'])
train_df['video_name'] = train_bov['video_name']
train_df['true_label'] = train_bov['true_label']

In [3]:
X_train = train_bov['data']
y_train = train_bov['true_label']

## Load test data

In [4]:
test_bov = np.load('./bov_test.npz')
test_df = pd.DataFrame(test_bov['data'])
test_df['video_name'] = test_bov['video_name']
test_df['true_label'] = test_bov['true_label']

In [5]:
X_test = test_bov['data']
y_test = test_bov['true_label']

In [6]:
X_train.shape[0]+X_test.shape[0]

2765

##Train 1-vs-all SVM classifier

In [11]:
estimator = OneVsRestClassifier(LinearSVC(random_state=0, C=100, loss='l2', penalty='l2'))
classifier = estimator.fit(X_train, y_train)



In [7]:
def compute_scores_df(classifier, X, y):
    scores = classifier.decision_function(X)
    df = pd.DataFrame(scores)
    df['true_label'] = y
    df['predicted_label'] = np.argmax(scores,axis=1)
    return df


In [8]:
def compute_classic_average_precision(scores_data, class_id):
    # given a class_id compute the average precision for that class
    df = scores_data[[class_id, 'true_label']]
    df = df.sort(class_id, ascending=False)
    df['match'] = (df.true_label == class_id)
    df['precision'] = np.cumsum(df.match)/np.arange(1, len(scores_data) + 1)
    npos = sum(df.match)
    df['precision_match'] = (df.match)*df.precision
    ap = sum(df.precision_match)/npos
    return ap

def compute_rect_average_precision(scores_data, class_id):
    # given a class_id compute the average precision for that class
    df = scores_data[[class_id, 'true_label', 'predicted_label']]
    df = df.sort(class_id, ascending=False)
    df['match'] = (df.true_label == class_id)
    df['precision'] = np.cumsum(df.match)/np.arange(1, len(scores_data) + 1)
    df['recall'] = np.cumsum(df.match)/np.sum(df.true_label == class_id)
    df['delta_recall'] = np.diff(np.append([0], df.recall.values)) #delta_recall(i) = recall(i)-recall(i-1)
    df['rect_area'] = df['precision'] * df['delta_recall']
    return sum(df.rect_area)


In [9]:
def compute_mean_average_precision(scores_data):
    average_precision = [] # list to hold average_precision
    for class_id in range(num_classes):
        ca = compute_classic_average_precision(scores_data, class_id)
        average_precision.append(ca)
    return np.mean(average_precision)

In [35]:
test_scores = compute_scores_df(classifier,X_test,y_test)
average_precision = [] # list to hold average_precision

for class_id in range(num_classes):
    ca = compute_classic_average_precision(test_scores, class_id)
    average_precision.append(ca)
    
#average_precision = [x if not np.isnan(x) else 0 for x in average_precision]
#labels_as_text = [categories[i] for i in range(num_classes)]

In [36]:
print 'MAP: %2f'%np.mean(average_precision)
# What was the accuracy?
print 'Accuracy: %2f' % np.mean(test_scores.true_label == test_scores.predicted_label)

MAP: 0.980090
Accuracy: 0.979882


In [39]:
average_precision = [] # list to hold average_precision

for class_id in range(num_classes):
    ca = compute_rect_average_precision(test_scores, class_id)
    average_precision.append(ca)
    
print 'MAP: %2f'%np.mean(average_precision)
# What was the accuracy?
print 'Accuracy: %2f' % np.mean(test_scores.true_label == test_scores.predicted_label)

MAP: 0.980090
Accuracy: 0.979882


#Add pre-trained CNN features to BoV vectors, and train SVM
## First, get frame labels from list

In [10]:

def get_label_from_list(list_filename):
    df = pd.read_csv(list_filename, delimiter= ' ', header = None, names = ['filename', 'class_id'])
    return df

frame_df =  get_label_from_list(full_list)
labels = frame_df.class_id.values
frame_df['video_name'] = frame_df.filename.apply(lambda x: x.split('/')[-2])
frame_df['frame_number'] = frame_df.filename.apply(lambda x: x.split('/')[-1])
print "number of frames in the list %d, number of videos in list %d" % (len(frame_df),len(frame_df.video_name.unique()))

number of frames in the list 35114, number of videos in list 2763


## Extract the cnn features using "python extract_cnn_features.py"

In [11]:
all_cnn = np.load('./cnn_features.npz')['data']
all_cnn_df = pd.DataFrame(all_cnn)
all_cnn_df['video_name'] = frame_df.video_name.unique()

#train an SVM and measure mAP using only the cnn features.

In [21]:
X_train_cnn = all_cnn_df[all_cnn_df.video_name.isin(train_df.video_name[:-1])].as_matrix(columns=range(4096))
X_test_cnn = all_cnn_df[all_cnn_df.video_name.isin(test_df.video_name[:-1])].as_matrix(columns=range(4096))
y_train_cnn = y_train[:-1]
y_test_cnn = y_test[:-1]

In [22]:
estimator_cnn = OneVsRestClassifier(LinearSVC(random_state=0, C=100, loss='l2', penalty='l2'))
classifier_cnn = estimator_cnn.fit(X_train_cnn, y_train_cnn)

In [24]:
test_scores_cnn = compute_scores_df(classifier_cnn,X_test_cnn,y_test_cnn)
print 'MAP: %2f'% compute_mean_average_precision(test_scores_cnn)
# What was the accuracy?
print 'Accuracy: %2f' % np.mean(test_scores_cnn.true_label == test_scores_cnn.predicted_label)

MAP: 0.070348
Accuracy: 0.055687


#Join the BoV features and CNN features

In [None]:
train_df2 = train_df[:-1]
test_df2 = test_df[:-1]

In [203]:
test_joined_df = pd.merge(test_df2,all_cnn_df,how='left', on='video_name')
train_joined_df = pd.merge(train_df2,all_cnn_df,how='left', on='video_name')

In [204]:
def new_index(x):
    """
    only works if there are fewer columns in the left df.
    """
    new_x = x
    if type(x)==str:
        if '_x' in x:
            new_x = int(x[:-2])
        elif '_y' in x:
            new_x = 16000+int(x[:-2])
    return new_x
        
test_joined_df.rename(columns=new_index, inplace=True)
train_joined_df.rename(columns=new_index, inplace=True)

#train an SVM and measure MAP

In [210]:
X_train2 = train_joined_df.as_matrix(columns=range(20096))
X_test2 = test_joined_df.as_matrix(columns=range(20096))
y_train2 = np.array(train_joined_df.true_label)
y_test2 = np.array(test_joined_df.true_label)

In [212]:
estimator2 = OneVsRestClassifier(LinearSVC(random_state=0, C=100, loss='l2', penalty='l2'))
classifier2 = estimator.fit(X_train2, y_train2)

# compute MAP

In [214]:
test_scores2 = compute_scores_df(classifier2,X_test2,y_test2)
print 'MAP: %2f'% compute_mean_average_precision(test_scores2)
# What was the accuracy?
print 'Accuracy: %2f' % np.mean(test_scores2.true_label == test_scores2.predicted_label)

MAP: 0.924445
Accuracy: 0.938389
