In [47]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split

from sklearn import linear_model

from scipy.stats import spearmanr

import pandas as pd
import numpy as np
import pickle

In [2]:
data_path = r'D:\Workspace\video-mem\training_set/'

In [3]:
score_data = pd.read_csv(data_path+'scores_v2.csv')
human_caption_data = pd.read_csv(data_path+'text_descriptions.csv')

In [4]:
score_and_caption = pd.merge(left = score_data, right = human_caption_data, how = 'inner', on = ['video_id'])[['video_id', 'part_1_scores', 'part_2_scores', 'description']]
score_and_caption.columns = ['video_id', 'short-term_memorability', 'long-term_memorability', 'description']
score_and_caption.head(10)

Unnamed: 0,video_id,short-term_memorability,long-term_memorability,description
0,8,0.75,0.57,2 men on a stage hug and walk away
1,8,0.75,0.57,two young man are standing on a stage embracin...
2,26,0.87,0.43,deadpool drinking from a cup
3,26,0.87,0.43,persons in costumes dance
4,33,0.69,0.75,a soccer player shoots a ball into a tiny goal...
5,33,0.69,0.75,soccer ball rolling into a small goal on a soc...
6,46,0.87,0.57,multiple chinese persons dance onstage
7,46,0.87,0.57,on a podium 5 person perform a dance
8,64,0.84,0.56,an asian man in a clothing store embraces an o...
9,64,0.84,0.56,an asian man in cloth shop opens his jacket an...


In [44]:
def get_bow(X):
    # bag of word
    cv = CountVectorizer(ngram_range=(2,4), stop_words='english')
    X_CV = cv.fit_transform(X)
    x = X_CV.toarray()
    return x

In [82]:
def train_different_linear(df):
    X = df['description']
    x_raw = get_bow(X)
    Y_short = df['short-term_memorability']
    Y_long = df['long-term_memorability']
    
    x_short_train, x_short_val, y_short_train, y_short_val = train_test_split(
    x_raw,
    Y_short,
    test_size=0.125,
    random_state=42)
    
    print(x_short_train.shape, x_short_val.shape)

    # linear regression
    linear_reg = linear_model.LinearRegression().fit(x_short_train, y_short_train)
    y_linear_pred = linear_reg.predict(x_short_val)
    print('linear regression: ', spearmanr(y_short_val, y_linear_pred))
    
    best_alpha = 0
    best_spearmanr = 0
    # ridge
    for alpha_test in np.linspace(1, 30, num = 30):
        ridge_reg = linear_model.Ridge(alpha= alpha_test)
        ridge_reg.fit(x_short_train, y_short_train)
        y_ridge_pred = ridge_reg.predict(x_short_val)
        spearman_rank = spearmanr(y_short_val, y_ridge_pred)
        print('ridge regression: ', spearman_rank, "alpha = ", alpha_test)
        if spearman_rank[0] > best_spearmanr:
            best_alpha = alpha_test
            best_spearmanr = spearman_rank[0]
    print('the best alpha: ', best_alpha, "the best spearman rank: ", best_spearmanr)
    
    # lasso
    # for alpha_test in np.linspace(1, 30, num = 30):
    lasso_reg = linear_model.Lasso(alpha = alpha_test)
    lasso_reg.fit(x_short_train, y_short_train)
    y_lasso_pred = lasso_reg.predict(x_short_val)
    print('lasso: regression:', spearmanr(y_short_val, y_lasso_pred))
    

In [83]:
# no_automatic
train_different_linear(score_and_caption)

(1917, 43982) (274, 43982)
linear regression:  SpearmanrResult(correlation=0.3353936744168005, pvalue=1.2549277630819479e-08)
ridge regression:  SpearmanrResult(correlation=0.34505722930476884, pvalue=4.4436053767948036e-09) alpha =  1.0
ridge regression:  SpearmanrResult(correlation=0.3513635810160045, pvalue=2.213567151170895e-09) alpha =  2.0
ridge regression:  SpearmanrResult(correlation=0.3542772825545336, pvalue=1.595842312825175e-09) alpha =  3.0
ridge regression:  SpearmanrResult(correlation=0.355382660957193, pvalue=1.4083189789855454e-09) alpha =  4.0
ridge regression:  SpearmanrResult(correlation=0.3579059025421275, pvalue=1.056794745867181e-09) alpha =  5.0
ridge regression:  SpearmanrResult(correlation=0.35960208160755563, pvalue=8.70043768952626e-10) alpha =  6.0
ridge regression:  SpearmanrResult(correlation=0.36116596571488485, pvalue=7.265024679705871e-10) alpha =  7.0
ridge regression:  SpearmanrResult(correlation=0.3600395602990177, pvalue=8.273307483754844e-10) alph



In [84]:
# with automatic
with open(data_path + 'combined_captions.pkl', "rb") as file:
        automatic_caption = pickle.load(file)
        
score_and_caption2 = pd.merge(left = score_data, right = automatic_caption, how = 'inner', on = ['video_id'])[['video_id', 'part_1_scores', 'part_2_scores', 'description']]
score_and_caption2.columns = ['video_id', 'short-term_memorability', 'long-term_memorability', 'description']
score_and_caption2.head(10)

Unnamed: 0,video_id,short-term_memorability,long-term_memorability,description
0,8,0.75,0.57,2 men on a stage hug and walk away
1,8,0.75,0.57,a man is standing in a street with a dog
2,8,0.75,0.57,a woman is standing in front of a tie
3,8,0.75,0.57,a man wearing a hat and a tie
4,8,0.75,0.57,two young man are standing on a stage embracin...
5,26,0.87,0.43,a woman is sitting on a street with an umbrella
6,26,0.87,0.43,a man wearing a shirt and a tie
7,26,0.87,0.43,a woman sitting on a bench with a tie
8,26,0.87,0.43,deadpool drinking from a cup
9,26,0.87,0.43,persons in costumes dance


In [85]:
train_different_linear(score_and_caption2)

(3465, 47265) (496, 47265)
linear regression:  SpearmanrResult(correlation=-0.061029378216204716, pvalue=0.1747733904955961)
ridge regression:  SpearmanrResult(correlation=0.19521814351086786, pvalue=1.1925471324685823e-05) alpha =  1.0
ridge regression:  SpearmanrResult(correlation=0.19193997285432013, pvalue=1.6773945387767728e-05) alpha =  2.0
ridge regression:  SpearmanrResult(correlation=0.19156681812939302, pvalue=1.7431824392318428e-05) alpha =  3.0
ridge regression:  SpearmanrResult(correlation=0.19295386898835118, pvalue=1.5103471306981995e-05) alpha =  4.0
ridge regression:  SpearmanrResult(correlation=0.19417663727387136, pvalue=1.3298933331630173e-05) alpha =  5.0
ridge regression:  SpearmanrResult(correlation=0.1959958034697934, pvalue=1.0989130051203396e-05) alpha =  6.0
ridge regression:  SpearmanrResult(correlation=0.19615457386581223, pvalue=1.0806765495202085e-05) alpha =  7.0
ridge regression:  SpearmanrResult(correlation=0.19656769187702802, pvalue=1.034565875916190



lasso: regression: SpearmanrResult(correlation=nan, pvalue=nan)
