**IMPORT FROM DRIVE**

In [13]:
from google.colab import drive
import os
drive.mount('/content/drive/', force_remount=True)
os.chdir('/content/drive/My Drive/')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive/


In [4]:
!pip install pyprind

Collecting pyprind
  Downloading https://files.pythonhosted.org/packages/1e/30/e76fb0c45da8aef49ea8d2a90d4e7a6877b45894c25f12fb961f009a891e/PyPrind-2.11.2-py3-none-any.whl
Installing collected packages: pyprind
Successfully installed pyprind-2.11.2


**IMPORT LIBRARIES**

In [0]:
import pandas as pd

from tensorflow.python.keras import Sequential
from tensorflow.python.keras import layers
from tensorflow.python.keras import regularizers
from tensorflow.python.keras.preprocessing.text import Tokenizer

import numpy as np
from string import punctuation
import pyprind
from collections import Counter
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [0]:
from numpy.random import seed
seed(1)
import tensorflow
tensorflow.random.set_seed(1)

**REGRESSION MODEL - RANDOM FOREST REGRESSOR**

In [0]:
from sklearn.ensemble import RandomForestRegressor

**Grid search parameters for RandomForestRegressor**

In [0]:
from sklearn.model_selection import GridSearchCV
#Setting parameters for GridSearch
grid_values = {
    'bootstrap': [True],
    'max_depth': [70, 80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12, 14],
    'n_estimators': [70, 100, 130, 160]
}
#Base RandomForestRegressor model
rfr = RandomForestRegressor()
#Instantiate the grid search model
grid_search = GridSearchCV(estimator = rfr, param_grid = grid_values, cv = 3, n_jobs = -1, verbose = 2)



**SPEARMANS CORRELATION COEFFICIENT FUNCTION**

In [0]:
def Get_score(Y_pred,Y_true):
    '''Calculate the Spearmann"s correlation coefficient'''
    Y_pred = np.squeeze(Y_pred)
    Y_true = np.squeeze(Y_true)
    if Y_pred.shape != Y_true.shape:
        print('Invalid input shapes.')
    else:
        if len(Y_pred.shape) == 1:
            Res = pd.DataFrame({'Y_true':Y_true,'Y_pred':Y_pred})
            score_mat = Res[['Y_true','Y_pred']].corr(method='spearman',min_periods=1)
            print('Spearman\'s correlation coefficient:\t %.3f' % score_mat.iloc[1][0])
        else:
            for ii in range(Y_pred.shape[1]):
                Get_score(Y_pred[:,ii],Y_true[:,ii])

**ACCURACY FUNCTION**

In [0]:
def evaluate(model, X_test, Y_test):
    pred = model.predict(X_test) 
    err = abs(pred - Y_test) #Calculating error
    err_perc = 100 * np.mean(err / Y_test)
    accuracy = 100 - err_perc #Calculating accuracy of the model

    print('Average Error\t: {:0.4f} degrees.'.format(np.mean(err)))
    print('Accuracy\t: {:0.2f}%.'.format(accuracy))
    
    return accuracy

**-------------------------------------CAPTIONS-------------------------------------**

In [0]:
#Function to load captions
def load_caps(fname):
    """Load the captions into a dataframe"""
    vn = []
    cap = []
    df = pd.DataFrame();
    with open(fname) as f:
        for line in f:
            pairs = line.split()
            vn.append(pairs[0])
            cap.append(pairs[1])
        df['video']=vn  #Video name to column 'video'
        df['caption']=cap   #Caption to column 'caption' 
    return df

cap_path = './Dev-set/Captions/dev-set_video-captions.txt'   #Caption path
df_cap=load_caps(cap_path)   #Calling the function

# load the ground truth values
label_path = './Dev-set/Ground-truth/'
labels=pd.read_csv(label_path+'ground-truth.csv')

In [117]:
print(df_cap)

               video                                            caption
0        video3.webm                 blonde-woman-is-massaged-tilt-down
1        video4.webm  roulette-table-spinning-with-ball-in-closeup-shot
2        video6.webm                                      khr-gangsters
3        video8.webm               medical-helicopter-hovers-at-airport
4       video10.webm               couple-relaxing-on-picnic-crane-shot
...              ...                                                ...
5995  video7488.webm        beautiful-young-woman-in-front-of-fountains
5996  video7489.webm  focus-pull-from-molting-penguin-to-penguin-col...
5997  video7491.webm           students-walking-in-university-of-mexico
5998  video7492.webm                       beautiful-black-woman-at-spa
5999  video7493.webm  beautiful-modern-brunette-woman-using-tablet-c...

[6000 rows x 2 columns]


In [21]:
print(labels.iloc[1])

video                        video4.webm
short-term_memorability            0.923
nb_short-term_annotations             33
long-term_memorability             0.667
nb_long-term_annotations              12
Name: 1, dtype: object


**ELIMINATE PUNCTUATIONS AND CLEANING CAPTIONS**

In [22]:
counts = Counter()
# setup prograss tracker
pbar = pyprind.ProgBar(len(df_cap['caption']), title='Counting word occurrences')
for i, cap in enumerate(df_cap['caption']):
    text = ''.join([c if c not in punctuation else ' ' for c in cap]).lower()   #Replacing punctuations with space and convert the caption to lower case
    df_cap.loc[i,'caption'] = text
    pbar.update()
    counts.update(text.split())

Counting word occurrences
0% [##############################] 100% | ETA: 00:00:00
Total time elapsed: 00:00:01


In [23]:
df_cap

Unnamed: 0,video,caption
0,video3.webm,blonde woman is massaged tilt down
1,video4.webm,roulette table spinning with ball in closeup shot
2,video6.webm,khr gangsters
3,video8.webm,medical helicopter hovers at airport
4,video10.webm,couple relaxing on picnic crane shot
...,...,...
5995,video7488.webm,beautiful young woman in front of fountains
5996,video7489.webm,focus pull from molting penguin to penguin col...
5997,video7491.webm,students walking in university of mexico
5998,video7492.webm,beautiful black woman at spa


In [24]:
print(counts)
print(len(counts))

5191


**ONE HOT ENCODING**

In [25]:
#Building the index based on words
len_token = len(counts)
tokenizer = Tokenizer(num_words=len_token)
print(len_token)

5191


In [26]:
tokenizer.fit_on_texts(list(df_cap.caption.values))
print(len(tokenizer.word_index))
one_hot = tokenizer.texts_to_matrix(list(df_cap.caption.values),mode='binary')
seq = tokenizer.texts_to_sequences(list(df_cap.caption.values))

5191


**SETTING TEST VALUES FOR X**

In [27]:
max_len = 50
X_seq = np.zeros((len(seq),max_len))
for i in range(len(seq)):
    n = len(seq[i])
    if n==0:
        print(i)
    else:
        X_seq[i,-n:] = seq[i]
X_seq.shape

(6000, 50)

**TRAIN TEST SPLIT - CAPTIONS**

In [0]:
CAP_X = X_seq
CAP_Y = labels[['short-term_memorability','long-term_memorability']].values

CAP_X_train, CAP_X_test, CAP_Y_train, CAP_Y_test = train_test_split(CAP_X,CAP_Y, test_size=0.2, random_state=42)


**GRID SEARCH - CAPTIONS**

In [29]:
grid_search.fit(CAP_X_train,CAP_Y_train)

Fitting 3 folds for each of 480 candidates, totalling 1440 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   12.1s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:   45.2s
[Parallel(n_jobs=-1)]: Done 361 tasks      | elapsed:  1.8min
[Parallel(n_jobs=-1)]: Done 644 tasks      | elapsed:  3.2min
[Parallel(n_jobs=-1)]: Done 1009 tasks      | elapsed:  4.9min
[Parallel(n_jobs=-1)]: Done 1440 out of 1440 | elapsed:  7.1min finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=None,
                                             verbose=0, warm_start=False),
             iid='deprecated', n_jo

In [30]:
grid_search.best_estimator_   #Best estimator for Captions

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=110, max_features=3, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=4,
                      min_samples_split=12, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

**PREDICTIONS AND SPEARMANS CORRELATION COEFFICIENT - CAPTIONS**

In [31]:
CAP_estimator = grid_search.best_estimator_
CAP_estimator.fit(CAP_X_train,CAP_Y_train)
pred_test_rfr = CAP_estimator.predict(CAP_X_test)

Get_score(pred_test_rfr, CAP_Y_test)   #Calling function to calculate Spearman's Correlation Coefficient

Spearman's correlation coefficient:	 0.250
Spearman's correlation coefficient:	 0.123


**ACCURACY - CAPTIONS**

In [36]:
print('SUMMARY - CAPTIONS:')
grid_accuracy = evaluate(CAP_estimator, CAP_X_test, CAP_Y_test)  #Calling function to calculate accuracy

SUMMARY - CAPTIONS:
Average Error	: 0.0883 degrees.
Accuracy	: 87.45%.


**-------------------------------------C3D-------------------------------------**


In [40]:
import os.path  
c3d_path = './Dev-set/C3D/'   #Path of C3D

os.listdir(c3d_path)[:5]


['video6632.txt',
 'video6634.txt',
 'video6633.txt',
 'video6645.txt',
 'video6643.txt']

**LOAD C3D**

In [0]:
#Function for loading C3D values
def load_C3D(fname):
    with open(fname) as f: 
      for line in f:
        C3D =[float(item) for item in line.split()]
    return C3D


**JOIN VIDEO ID AND C3D**

In [0]:
dev_path = './Dev-set/' #Path of Dev set directory
vid_names = labels.video.values

C3D_Video = pd.DataFrame({'video': vid_names, 'C3D': [load_C3D(dev_path+'C3D'+'/'+os.path.splitext(item)[0]+'.txt') for item in vid_names],}) #Concatenating video names and C3D

In [44]:
print(C3D_Video)

               video                                                C3D
0        video3.webm  [0.02024942, 0.0015778, 0.00082625, 0.00094509...
1        video4.webm  [0.000118, 0.00089075, 0.00018769, 4.543e-05, ...
2        video6.webm  [0.01176522, 0.00074577, 0.00078353, 1.328e-05...
3        video8.webm  [0.00022343, 0.00016499, 7.35e-06, 1.615e-05, ...
4       video10.webm  [9.006e-05, 0.00061494, 0.00343634, 0.00128092...
...              ...                                                ...
5995  video7488.webm  [0.35724622, 0.07598178, 0.00302532, 2.772e-05...
5996  video7489.webm  [0.19280288, 0.00093812, 0.00506054, 1.35e-06,...
5997  video7491.webm  [2.589e-05, 0.00012192, 2.781e-05, 2.608e-05, ...
5998  video7492.webm  [0.26509121, 0.09653918, 5.971e-05, 0.00022735...
5999  video7493.webm  [0.02058949, 0.00122141, 0.00206607, 0.0023161...

[6000 rows x 2 columns]


**TRAIN TEST SPLIT - C3D**

In [0]:
C3D_X = np.stack(C3D_Video['C3D'].values)   #Setting C3D values to X
C3D_Y = labels[['short-term_memorability','long-term_memorability']].values

C3D_X_train, C3D_X_test, C3D_Y_train, C3D_Y_test = train_test_split(C3D_X,C3D_Y, test_size=0.2, random_state=42)


In [46]:
C3D_X

array([[2.0249420e-02, 1.5778000e-03, 8.2625000e-04, ..., 3.3907000e-04,
        8.4368300e-03, 4.7004000e-04],
       [1.1800000e-04, 8.9075000e-04, 1.8769000e-04, ..., 1.5891000e-04,
        1.0252100e-03, 2.0460000e-05],
       [1.1765220e-02, 7.4577000e-04, 7.8353000e-04, ..., 5.3803300e-03,
        1.0274800e-03, 1.3844800e-03],
       ...,
       [2.5890000e-05, 1.2192000e-04, 2.7810000e-05, ..., 7.1593200e-03,
        4.9575200e-03, 8.9156000e-04],
       [2.6509121e-01, 9.6539180e-02, 5.9710000e-05, ..., 1.0834000e-04,
        1.2682000e-03, 7.1027000e-04],
       [2.0589490e-02, 1.2214100e-03, 2.0660700e-03, ..., 8.2839000e-04,
        4.9012800e-03, 1.1848300e-03]])

In [47]:
C3D_Y

array([[0.924, 0.846],
       [0.923, 0.667],
       [0.863, 0.7  ],
       ...,
       [0.713, 0.818],
       [0.954, 1.   ],
       [0.953, 0.615]])

**GRID SEARCH - C3D**

In [48]:
grid_search.fit(C3D_X_train,C3D_Y_train)

Fitting 3 folds for each of 480 candidates, totalling 1440 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   21.5s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  1.5min
[Parallel(n_jobs=-1)]: Done 361 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done 644 tasks      | elapsed:  6.6min
[Parallel(n_jobs=-1)]: Done 1009 tasks      | elapsed: 10.3min
[Parallel(n_jobs=-1)]: Done 1440 out of 1440 | elapsed: 15.0min finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=None,
                                             verbose=0, warm_start=False),
             iid='deprecated', n_jo

In [49]:
grid_search.best_estimator_   #Best estimator for C3D

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=100, max_features=2, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=3,
                      min_samples_split=8, min_weight_fraction_leaf=0.0,
                      n_estimators=160, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

**PREDICTIONS AND SPEARMANS CORRELATION COEFFICIENT - C3D**

In [50]:
C3D_estimator = grid_search.best_estimator_
C3D_estimator.fit(C3D_X_train,C3D_Y_train)
pred_test_rfr = C3D_estimator.predict(C3D_X_test)

Get_score(pred_test_rfr, C3D_Y_test)   #Calling function to calculate Spearman's Correlation Coefficient


Spearman's correlation coefficient:	 0.313
Spearman's correlation coefficient:	 0.124


**ACCURACY - C3D**

In [51]:
print('SUMMARY - C3D:')
grid_accuracy = evaluate(C3D_estimator, C3D_X_test, C3D_Y_test)   #Calling function to calculate accuracy

SUMMARY - C3D:
Average Error	: 0.0879 degrees.
Accuracy	: 87.53%.


**-------------------------------------CAPTIONS & C3D-------------------------------------**

**MERGE CAPTIONS AND C3D VALUES**

In [52]:
#combining captions and c3d
cap_c3d = (X_seq.tolist())
i = 0
for item in range(6000):
    cap_c3d[i] = np.append(cap_c3d[i],C3D_Video['C3D'].iloc[i],axis=0)   #Concatenate captions and C3D of respective videos
    i+=1
len(cap_c3d)

6000

In [0]:
C3D_Video['C3D'].iloc[0]

In [0]:
cap_c3d

**TRAIN TEST SPLIT - CAPTIONS & C3D**

In [0]:
CAP_C3D_X = cap_c3d
CAP_C3D_Y = labels[['short-term_memorability','long-term_memorability']].values

CAP_C3D_X_train, CAP_C3D_X_test, CAP_C3D_Y_train, CAP_C3D_Y_test = train_test_split(CAP_C3D_X,CAP_C3D_Y, test_size=0.2, random_state=42)


**GRID SEARCH - CAPTIONS & C3D**

In [56]:
grid_search.fit(CAP_C3D_X_train,CAP_C3D_Y_train)

Fitting 3 folds for each of 480 candidates, totalling 1440 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:   23.5s
[Parallel(n_jobs=-1)]: Done 158 tasks      | elapsed:  1.6min
[Parallel(n_jobs=-1)]: Done 361 tasks      | elapsed:  4.0min
[Parallel(n_jobs=-1)]: Done 644 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done 1009 tasks      | elapsed: 11.2min
[Parallel(n_jobs=-1)]: Done 1440 out of 1440 | elapsed: 16.2min finished


GridSearchCV(cv=3, error_score=nan,
             estimator=RandomForestRegressor(bootstrap=True, ccp_alpha=0.0,
                                             criterion='mse', max_depth=None,
                                             max_features='auto',
                                             max_leaf_nodes=None,
                                             max_samples=None,
                                             min_impurity_decrease=0.0,
                                             min_impurity_split=None,
                                             min_samples_leaf=1,
                                             min_samples_split=2,
                                             min_weight_fraction_leaf=0.0,
                                             n_estimators=100, n_jobs=None,
                                             oob_score=False, random_state=None,
                                             verbose=0, warm_start=False),
             iid='deprecated', n_jo

In [57]:
grid_search.best_estimator_   #Best estimator for Captions and C3D combined

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=110, max_features=2, max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=4,
                      min_samples_split=14, min_weight_fraction_leaf=0.0,
                      n_estimators=130, n_jobs=None, oob_score=False,
                      random_state=None, verbose=0, warm_start=False)

**PREDICTIONS AND SPEARMANS CORRELATION COEFFICIENT - CAPTIONS & C3D**

In [58]:
CAP_C3D_estimator = grid_search.best_estimator_
CAP_C3D_estimator.fit(CAP_C3D_X_train,CAP_C3D_Y_train)
pred_test_rfr = CAP_C3D_estimator.predict(CAP_C3D_X_test)

Get_score(pred_test_rfr, CAP_C3D_Y_test)   #Calling function to calculate Spearman's Correlation Coefficient


Spearman's correlation coefficient:	 0.325
Spearman's correlation coefficient:	 0.141


**ACCURACY - CAPTIONS & C3D**

In [59]:
print('SUMMARY - CAPTIONS & C3D:')
grid_accuracy = evaluate(CAP_C3D_estimator, CAP_C3D_X_test, CAP_C3D_Y_test)   #Calling function to calculate accuracy

SUMMARY - CAPTIONS & C3D:
Average Error	: 0.0878 degrees.
Accuracy	: 87.55%.
