## Object Bank features - Dimentionality Reduction , LR and LSVC classifiers

### Importing the data and encode them to samples and targets

In [2]:
import os
import glob
import numpy as np
X = []
Y = []

Dataset_path ="./Dataset/Objectbank/"
cat_dirs = os.walk(Dataset_path).next()[1]
count = 0
for cat in cat_dirs:
    
    label = count
    video_dirs_path = Dataset_path + "/" + cat
    video_dirs =  os.walk(video_dirs_path).next()[1]
    
    for vid_dir in video_dirs:
        vid_dir_path = video_dirs_path  + "/"+ vid_dir 
        for fname in glob.glob(vid_dir_path + "/*.feat"):
            x = np.loadtxt(fname)
            y = count
            X.append(x)
            Y.append(y)
    count = count + 1

### L1 based Feature selection

In [None]:
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel

lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y)
model = SelectFromModel(lsvc, prefit=True)
X_L1 = model.transform(X)

## Combining the feature vectors of frames and split the dataset to test train

We want to have one feature vector that represents the whole video.

In [4]:
X_videos = [X_L1[x:x+6] for x in range(0, len(X_L1), 6)]   

concat_list=[]
for k in range(0,320):  
    c = [j for i in X_videos[k] for j in i]
    concat_list.append(c)    

Y_List=[]
for i in range(0,8):   
    for j in range(0,40):
        y = i
        Y_List.append(y)


X_array=np.asarray(concat_list)
Y_array=np.asarray(Y_List)

from sklearn.model_selection import train_test_split
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_array, Y_array, test_size=0.2, random_state=42)


In [24]:
print(X_train1.shape)

(256, 1278)


## Logistic Regression

In [7]:
from sklearn.metrics import accuracy_score

from sklearn.linear_model import LogisticRegressionCV
LR_clf_L1 = LogisticRegressionCV(cv=5, random_state=0,multi_class='multinomial')
LR_clf_L1.fit(X_train1, y_train1)
y_pred_LR_L1 = LR_clf_L1.predict(X_test1)
Acc_LR_l1 = accuracy_score(y_test1, y_pred_LR_L1)
print("Accuracy for L1 - Logistic regression:" ,Acc_LR_l1 )

Accuracy for L1 - Logistic regression: 0.8125


## Linear SVC

In [8]:

from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
SVC_clf_L1 = LinearSVC(random_state=0, tol=1e-5)
SVC_clf_L1.fit(X_train1, y_train1)
y_pred_SVC_L1=SVC_clf_L1.predict(X_test1)
Acc_SVC_L1 = accuracy_score(y_test1, y_pred_SVC_L1)
print("Accuracy for L1 - LSVC:" ,Acc_SVC_L1 )

Accuracy for L1 - LSVC: 0.78125


## Tree based feature selection

In [9]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectFromModel


ETC = ExtraTreesClassifier()
ETC = ETC.fit(X, Y)
ETC.feature_importances_  
model2 = SelectFromModel(ETC, prefit=True)
X_new_2 = model2.transform(X)


## Combining the feature vectors of frames and split the dataset to test train

In [10]:
X_videos = [X_new_2[x:x+6] for x in range(0, len(X_new_2), 6)]   

concat_list=[]
for k in range(0,320):  
    c = [j for i in X_videos[k] for j in i]
    concat_list.append(c)    


Y_List=[]
for i in range(0,8):   
    for j in range(0,40):
        y = i
        Y_List.append(y)



X_array=np.asarray(concat_list)
Y_array=np.asarray(Y_List)



from sklearn.model_selection import train_test_split
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_array, Y_array, test_size=0.2, random_state=42)


In [11]:
print(X_train2.shape)

(256, 25662)


## Logistic Regression

In [12]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegressionCV
LR_clf_T = LogisticRegressionCV(cv=5, random_state=0,multi_class='multinomial')
LR_clf_T.fit(X_train2, y_train2)
y_pred_LR_T = LR_clf_T.predict(X_test2)
Acc_LR_T = accuracy_score(y_test2, y_pred_LR_T)
print("Accuracy for Tree based feature extraction - Logistic regression:" ,Acc_LR_T )

Accuracy for Tree based feature extraction - Logistic regression: 0.75


## Linear SVC

In [13]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
SVC_clf_T = LinearSVC(random_state=0, tol=1e-5)
SVC_clf_T.fit(X_train2, y_train2)
y_pred_SVC_T=SVC_clf_T.predict(X_test2)
Acc_SVC_T = accuracy_score(y_test2, y_pred_SVC_T)
print("Accuracy for Tree based feature extraction - LSVC:" , Acc_SVC_T )

Accuracy for Tree based feature extraction - LSVC: 0.75


## Random Projection

In [14]:

from sklearn import random_projection
transformer = random_projection.GaussianRandomProjection()
X_LD = transformer.fit_transform(X)



In [15]:
X_videos = [X_LD[x:x+6] for x in range(0, len(X_LD), 6)]   


concat_list=[]
for k in range(0,320):  
    c = [j for i in X_videos[k] for j in i]
    concat_list.append(c)    


Y_List=[]
for i in range(0,8):   
    for j in range(0,40):
        y = i
        Y_List.append(y)



X_array=np.asarray(concat_list)
Y_array=np.asarray(Y_List)


from sklearn.model_selection import train_test_split
X_train3, X_test3, y_train3, y_test3 = train_test_split(X_array, Y_array, test_size=0.2, random_state=42)
print(X_train3.shape)

## Logistic Regression

In [18]:
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegressionCV
LR_clf_LD = LogisticRegressionCV(cv=5, random_state=0,multi_class='multinomial')
LR_clf_LD.fit(X_train3, y_train3)
y_pred_LR_LD = LR_clf_LD.predict(X_test3)
Acc_LR_LD = accuracy_score(y_test3, y_pred_LR_LD)

print("Accuracy for Random Projection - Logistic regression:" ,Acc_LR_LD )

Accuracy for Random Projection - Logistic regression: 0.765625


## Linear SVC

In [19]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
SVC_clf_LD = LinearSVC(random_state=0, tol=1e-5)
SVC_clf_LD.fit(X_train3, y_train3)
y_pred_SVC_LD=SVC_clf_LD.predict(X_test3)
Acc_SVC_LD = accuracy_score(y_test3, y_pred_SVC_LD)
print("Accuracy for Random Projection - LSVC:" ,Acc_SVC_LD )

Accuracy for Random Projection - LSVC: 0.734375


## Principal Component Analysis 

In [20]:
from sklearn.decomposition import PCA
pca = PCA()
FE3=pca.fit(X)
X_PC = pca.transform(X)

In [21]:

X_videos = [X_PC[x:x+6] for x in range(0, len(X_PC), 6)]   

concat_list=[]
for k in range(0,320):  
    c = [j for i in X_videos[k] for j in i]
    concat_list.append(c)    



Y_List=[]
for i in range(0,8):   
    for j in range(0,40):
        y = i
        Y_List.append(y)



X_array=np.asarray(concat_list)
Y_array=np.asarray(Y_List)


from sklearn.model_selection import train_test_split
X_train4, X_test4, y_train4, y_test4 = train_test_split(X_array, Y_array, test_size=0.2, random_state=42)

print(X_train4.shape)

(256, 11520)


## Logistic Regression

In [22]:
from sklearn.metrics import accuracy_score
 
from sklearn.linear_model import LogisticRegressionCV
LR_clf_PC = LogisticRegressionCV(cv=5, random_state=0,multi_class='multinomial')
LR_clf_PC.fit(X_train4, y_train4)
y_pred_LR_PC = LR_clf_PC.predict(X_test4)
Acc_LR_PC = accuracy_score(y_test4, y_pred_LR_PC)
print("Accuracy for PCA - Logistic regression:" ,Acc_LR_PC )

Accuracy for PCA - Logistic regression: 0.75


## Linear SVC

In [23]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
SVC_clf_PC = LinearSVC(random_state=0, tol=1e-5)
SVC_clf_PC.fit(X_train4, y_train4)
y_pred_SVC_PC=SVC_clf_PC.predict(X_test4)
Acc_SVC_PC = accuracy_score(y_test4, y_pred_SVC_PC)
print("Accuracy for PCA - LSVC:" ,Acc_SVC_PC )

Accuracy for PCA - LSVC: 0.75
