In [2]:
import os, pickle, copy
from joblib import Parallel, delayed
from gensim.models import KeyedVectors
import numpy as np
from numpy.linalg import norm
from scipy.spatial.distance import cosine, euclidean
from scipy.stats import pearsonr
import pandas as pd
import matplotlib.pyplot as plt

# import tensorflow as tf
# import tensorflow.contrib.eager as tfe
# tf.enable_eager_execution()

In [3]:
def get_fns_and_meta(data_pth, folders):
    """ load the word vector model filenames
    """
    models_meta = {}
    for folder in folders:
        for file in os.listdir(os.path.join(data_pth, 
                                            folder)):
            model_meta = {}
            model_meta['root'] = data_pth
            model_meta['class'] = folder
            model_meta['fn'] = file
            #model_meta['name'] = file[:-4]
            
            if folder == 'glove':
                dim = file[file.find('B')+2:file.find('d')]
                model_meta['d'] = int(dim)
            elif folder == 'w2v':
                model_meta['d'] = 300
            
            models_meta[file[:-4]] = model_meta
    
    return models_meta

def load_relsim_data(path='', fn='relsim_mean_ratings.csv'):

    df = pd.read_csv(path+fn)
    df['rel1_type'] = df['relation1'].apply(lambda x: int(x[:-1]))
    df['rel2_type'] = df['relation2'].apply(lambda x: int(x[:-1]))
    
    return df

def words_in_vocab(words, model):
    
    status = True
    for w in words:
        try:
            if w not in model.vocab:
                status = False
        except:
            if w not in model.keys():
                status = False            
    return status


def compute_similarity(u, v, metric='e'):
    
    if metric in ['inner product', 'ip']:
        return np.dot(u, v)
    
    elif metric in ['cosine', 'c']:
        return 1 - cosine(u, v)
    
    elif metric in ['euclidean', 'e']:
        return -euclidean(u, v)
    
    elif metric in ['dawn_euclidean', 'd']:
        return 1 - euclidean(u, v)

    
def get_analogy_words(trial):
    
    return [trial.pair1_word1,
            trial.pair1_word2,
            trial.pair2_word1,
            trial.pair2_word2]


def get_relsim_vocab(df):
    
    words = []
    words += list(df.pair1_word1.unique())
    words += list(df.pair1_word2.unique())
    words += list(df.pair2_word1.unique())
    words += list(df.pair2_word2.unique())
    
    return list(set(words))


def create_condensed_model_relsim(df, model):
    """ Create a condensed model made just
        for the relational similarity data.
    """
    
    vocab = get_relsim_vocab(df)
    
    return create_condensed_model(vocab, model)


def create_condensed_model(vocab, model):
    """ Create a condensed model as a {word: vector} 
        dictionary object for a smaller vocabulary
        from an input w2v gensim model.
    """
    condensed_model = {}
    
    for word in vocab:
        if word in model.vocab:
            condensed_model[word] = model[word]
        
    return condensed_model


def load_model(model_fn='GoogleNews-vectors-negative300.bin',
               data_pth = '../../../../datasets/word-vector-datasets/',
               binary=True, load_condensed_stem=None, 
               condensed_vocab=None, save_condensed=False, 
               condensed_path=None):
    
    """ load word vector model w/ gensim
    """
    
    if 'glove' in model_fn:
        binary = False
        data_pth += 'glove/'
    elif 'GoogleNews' in model_fn:
        data_pth += 'w2v/'
        
    if None not in [load_condensed_stem, condensed_vocab, condensed_path]:
        c_model_fn = model_fn[:-3] + load_condensed_stem
        c_model_path = condensed_path + c_model_fn
        
        if os.path.isfile(c_model_path):
            return pickle.load(open(c_model_path, "rb"))
        else:
            model = KeyedVectors.load_word2vec_format(data_pth + model_fn, 
                                                      binary=binary)
            c_model = create_condensed_model(condensed_vocab, model)
            if save_condensed: pickle.dump(c_model, open(c_model_path, "wb"))
            return c_model
    else:
        return KeyedVectors.load_word2vec_format(data_pth + model_fn, 
                                                 binary=binary)


def makesave_or_load_condensed(models_meta):
    
    models = copy.deepcopy(models_meta)
    # store all condensed models in one dict
    for model_key in models_meta.keys():

        models[model_key]['model'] = load_model(model_fn=models[model_key]['fn'],
               data_pth=models[model_key]['root'],
               binary=True, load_condensed_stem='relsim.condensed.p', 
               condensed_vocab=vocab, save_condensed=True, 
               condensed_path='condensed_models/')
        
    return models


def get_word_vector(word, model, normalize=True):
    
    word_vector = model[word]
    
    if normalize:
        return word_vector / norm(word_vector)
    else:       
        return word_vector


def get_diff_vecs(words, model, dims=None):
    
    w1_vec = get_word_vector(words[0], model)
    w2_vec = get_word_vector(words[1], model)
    w3_vec = get_word_vector(words[2], model)
    w4_vec = get_word_vector(words[3], model)
    
    diff_pair1 = w1_vec - w2_vec
    diff_pair2 = w3_vec - w4_vec
    
    if dims is None:
        return diff_pair1, diff_pair2
    else:
        return diff_pair1[dims], diff_pair2[dims]
    

def naive_train_val_split(df, val_percent=0.2, 
                           shuffle=True, seed=1):
    """ Doesn't avoid shared single words
        across train and test sets!!
    """
    train_percent = 1 - val_percent
    
    n = df.shape[0]
    idxs = np.arange(n)
    np.random.seed(seed)
    if shuffle: np.random.shuffle(idxs)
    
    train_idxs = idxs[:int(n*train_percent)]
    val_idxs = idxs[int(n*train_percent):]
    
    return train_idxs, val_idxs


def score_preds(df):
    return pearsonr(df[df.in_vocab==True].mean_rating, 
                    df[df.in_vocab==True].preds)


def get_rel_sim_preds(df, model, dims=None,
                      metric='e'):
    
    preds, in_vocab = [], []
    for r, row in df.iterrows():
        
        words = get_analogy_words(row)
        
        if words_in_vocab(words, model):
        
            diff_pair1, diff_pair2 = \
                get_diff_vecs(words, model, dims=dims)
            
            sim = compute_similarity(diff_pair1, diff_pair2,
                                     metric=metric)
            preds.append(sim)
            in_vocab.append(True)
        else:
            preds.append(999)
            in_vocab.append(False)
        
    df['preds'] = preds
    df['in_vocab'] = in_vocab
    return df

def search_for_best_axes(df, model, epsilon=0, verbose=0):
    """ Find the subset of dimensions (axis-aligned subspace)
        giving the best fit to human data.
    """
    
    n_feats = model['dog'].size
    feat_idx_keep = np.arange(n_feats)
    
    df_pred = get_rel_sim_preds(df, model)
    base_score = score_preds(df_pred)[0]
    best_score = base_score
    if verbose > 0:
        print('Base Score : %.4f, Features: %i' % (best_score, n_feats))
    
    for feat_idx in np.arange(n_feats):
        
        curr_feat_set_proposal = feat_idx_keep[feat_idx_keep!=feat_idx]

        df_pred = get_rel_sim_preds(df, model, dims=curr_feat_set_proposal)
        curr_score = score_preds(df_pred)[0]
        
        if (curr_score > best_score) and (curr_score-best_score > epsilon):
            best_score = curr_score
            feat_idx_keep = curr_feat_set_proposal
            if verbose > 1:
                print('-- New Best: %.4f, Features: %i' % (best_score, feat_idx_keep.size))
                
    if verbose > 0:                
        print('Final Score: %.4f, Features: %i' % (best_score, feat_idx_keep.size))
            
    return feat_idx_keep, base_score, best_score

# def apply_func_to_all_models(models, func, subset=None):
    
#     if subset is not None: 
#         model_list = models.keys()
#     else:
#         model_list = subset
    
#     results = {}
#     for model_key in model_list:
#     return results

In [4]:
# load human relational similarity data
df_rel_sim = load_relsim_data()

# get the vocab for the dataset
vocab = get_relsim_vocab(df_rel_sim)

In [6]:
# where to find vector space models
data_pth = '../../../../datasets/word-vector-datasets/'
folders = ['glove','w2v']

# load meta for all models
models_meta = get_fns_and_meta(data_pth, folders)

# store all condensed models in one dict
models = makesave_or_load_condensed(models_meta)

# quick pointers to a few important models
w2v_gnews = models['GoogleNews-vectors-negative300']['model']
glove = models['glove.840B.300d']['model']

In [14]:
# model_list = models.keys()
model_list = ['GoogleNews-vectors-negative300',
              'glove.840B.300d']

# basic analysis
for model_key in model_list:
    model = models[model_key]['model']
    df_rel_sim = get_rel_sim_preds(df_rel_sim, model)
    print(model_key, 
          score_preds(df_rel_sim))

GoogleNews-vectors-negative300 (0.2612749669349393, 3.2947565200076315e-97)
glove.840B.300d (0.24200000103895664, 3.0018737071108936e-83)


In [16]:
# search for a subset of dimensions with best
# overall score across all types/subtypes

for m, model_key in enumerate(model_list):
    model = models[model_key]['model']
    print(model_key)
    search_for_best_axes(df_rel_sim, model, epsilon=0.0001, verbose=1)
    if (m+1) < len(model_list): print('')

GoogleNews-vectors-negative300
Base Score : 0.2613, Features: 300
Final Score: 0.3426, Features: 157

glove.840B.300d
Base Score : 0.2420, Features: 300
Final Score: 0.3126, Features: 154



In [None]:
# search for a subset of dimensions with best
# overall score across all types/subtypes

n_splits = 10
epsilon = 0.0001

all_base_scores = []
all_best_scores = []

train_base_scores = []
train_best_scores = []

val_base_scores = []
val_best_scores = []

for rel_type in range(1, 11):
    
    # within-TYPE trials only (what Dawn did for paper!)
    exp_params = (df_rel_sim.rel1_type==rel_type) & (df_rel_sim.rel2_type==rel_type)
    
    df_exp = df_rel_sim[exp_params].copy()
    
    print('Type', rel_type, ' - All Data Score', df_exp.shape[0])

    feats_all_data, all_base_score, all_best_score = \
        search_for_best_axes(df_exp, model, verbose=1, epsilon=epsilon)
    all_base_scores.append(all_base_score)
    all_best_scores.append(all_best_score)
    print('')
    
    avg_train_base_scores = []
    avg_train_best_scores = []
    avg_val_base_scores = []
    avg_val_best_scores = []
    
    for split in range(n_splits):
        train_idxs, val_idxs = naive_train_val_split(df_exp, 
                                                      val_percent=0.2, 
                                                      shuffle=True)

    #     print('Type', rel_type, ' - Training Score', 
    #           df_exp.iloc[train_idxs].shape[0])

        feats_train, train_base_score, train_best_score = \
            search_for_best_axes(df_exp.iloc[train_idxs].copy(), 
                                 model, verbose=0, epsilon=epsilon)
        
        df_val_base = get_rel_sim_preds(df_exp.iloc[val_idxs].copy(), model)
        df_val = get_rel_sim_preds(df_exp.iloc[val_idxs].copy(), model, dims=feats_train)
        print(score_preds(df_val)[0])
        
        avg_train_base_scores.append(train_base_score)
        avg_train_best_scores.append(train_best_score)
        avg_val_base_scores.append(score_preds(df_val_base)[0])
        avg_val_best_scores.append(score_preds(df_val)[0])
        
    print('mean val', np.mean(avg_val_best_scores))
    
    train_base_scores.append(np.mean(avg_train_base_scores))
    train_best_scores.append(np.mean(avg_train_best_scores))
    val_base_scores.append(np.mean(avg_val_base_scores))
    val_best_scores.append(np.mean(avg_val_best_scores))
#     print('')
    
#     df_val = get_rel_sim_preds(df_exp.iloc[val_idxs].copy(), model, dims=feats_train)

#     print('Type', rel_type, ' - Validation Score', 
#           df_exp.iloc[val_idxs].shape[0])
#     print('%.4f' % score_preds(df_val)[0])
    print('')
    print('')

In [None]:
### JOBLIB TEST!!! ###

# search for a subset of dimensions with best
# overall score across all types/subtypes

condensed_model = create_condensed_model(df_rel_sim, model)

n_splits = 50
epsilon = 0.0001

all_base_scores = []
all_best_scores = []

train_base_scores = []
train_best_scores = []

val_base_scores = []
val_best_scores = []

for rel_type in range(1, 11):
    
    # within-TYPE trials only (what Dawn did for paper!)
    exp_params = (df_rel_sim.rel1_type==rel_type) & (df_rel_sim.rel2_type==rel_type)
    
    df_exp = df_rel_sim[exp_params].copy()
    
    print('Type', rel_type, ' - All Data Score', df_exp.shape[0])

    feats_all_data, all_base_score, all_best_score = \
        search_for_best_axes(df_exp, model, verbose=0, epsilon=epsilon)
    all_base_scores.append(all_base_score)
    all_best_scores.append(all_best_score)
#     print('')
    
    avg_train_base_scores = []
    avg_train_best_scores = []
    avg_val_base_scores = []
    avg_val_best_scores = []
    
    def run_split(seed, df_exp):
        train_idxs, val_idxs = naive_train_val_split(df_exp, 
                                                      val_percent=0.2,
                                                      shuffle=True,
                                                      seed=seed)

        feats_train, train_base_score, train_best_score = \
            search_for_best_axes(df_exp.iloc[train_idxs].copy(), 
                                 condensed_model, verbose=0, epsilon=epsilon)
        
        df_val_base = get_rel_sim_preds(df_exp.iloc[val_idxs].copy(), condensed_model)
        val_base_score = score_preds(df_val_base)[0]
        
        df_val = get_rel_sim_preds(df_exp.iloc[val_idxs].copy(), condensed_model, dims=feats_train)
        val_best_score = score_preds(df_val)[0]
        
        return train_base_score, train_best_score, val_base_score, val_best_score
    
    results = Parallel(n_jobs=n_splits)(delayed(run_split)(i, df_exp) for i in range(n_splits))
    for result in results: print(result)
    
    for result in results:        
        avg_train_base_scores.append(result[0])
        avg_train_best_scores.append(result[1])
        avg_val_base_scores.append(result[2])
        avg_val_best_scores.append(result[3])
        
    print('mean val', np.mean(avg_val_best_scores))
    
    train_base_scores.append(np.mean(avg_train_base_scores))
    train_best_scores.append(np.mean(avg_train_best_scores))
    val_base_scores.append(np.mean(avg_val_base_scores))
    val_best_scores.append(np.mean(avg_val_best_scores))

    print('')
    print('')

In [None]:
# Type 1 epsilon test
# 121 0       0.5298 0.4289
# 121 0.00001 0.5298 0.4127
# 127 0.0001  0.5230 0.4552
# 204 0.001   0.4023 0.3364
# 300 0.01    0.1511 NA
# 300 0.1     0.1511 NA

In [None]:
for rel_type in range(1, 11):
    
    # within-TYPE trials only (what Dawn did for paper!)
    exp_params = (df_rel_sim.rel1_type==rel_type) & (df_rel_sim.rel2_type==rel_type)
    
    df_exp = df_rel_sim[exp_params].copy()
    
    print('Type', rel_type, ' - All Data Score', df_exp.shape[0])
    df_exp = get_rel_sim_preds(df_exp, model, metric='e')

    print(score_preds(df_exp)[0])
    print('')

In [None]:
plt.figure(dpi=150)

# set width of bar
barWidth = 0.25
 
# set height of bar
bars1 = all_base_scores
bars2 = all_best_scores
bars3 = val_best_scores
 
# Set position of bar on X axis
r1 = np.arange(len(bars1))
r2 = [x + barWidth for x in r1]
r3 = [x + barWidth for x in r2]
 
# Make the plot
plt.bar(r1, bars1, color='black', width=barWidth, edgecolor='white', 
        label='Original GloVe')
plt.axhline(y=np.mean(bars1), color='black', linestyle='--')
plt.bar(r2, bars2, color='#2d7f5e', width=barWidth, edgecolor='white', 
        label='Best Subspace (All Data)')
plt.axhline(y=np.mean(bars2), color='#2d7f5e', linestyle='--')
plt.bar(r3, bars3, color='purple', width=barWidth, edgecolor='white', 
        label='Best Subspace (Mean 10x Validation)')
plt.axhline(y=np.mean(bars3), color='purple', linestyle='--')
 
# Add xticks on the middle of the group bars
# plt.xlabel('group', fontweight='bold')
plt.ylabel('Pearson $r$', fontweight='bold')
plt.xlabel('Relation Type', fontweight='bold')
plt.xticks([r + barWidth for r in range(len(bars1))], range(1, 11))

plt.ylim([0,1])

# Create legend & Show graphic
plt.legend()
plt.show()

In [None]:
exp_params = (df_rel_sim.rel1_type==2) & (df_rel_sim.rel2_type==2)

df_rel_sim[exp_params]

In [None]:
# plt.figure(figsize=(15,15))
# fig, ax = plt.subplots(2, 5)
# ax = ax.flatten()

for rel_type in range(1, 11):
    plt.figure()
    
    # within-TYPE trials only (what Dawn did for paper!)
    exp_params = (df_rel_sim.rel1_type==rel_type) & (df_rel_sim.rel2_type==rel_type)
    
    df_exp = df_rel_sim[exp_params].copy()
    
    result = search_for_best_axes(df_exp, model, 
                                  epsilon=0.0001, verbose=0)
    good_feats = result[0]
    
    for r, row in df_exp.iterrows():

        words = get_analogy_words(row)

        if words_in_vocab(words, model):

            diff_pair1, diff_pair2 = \
                get_diff_vecs(words, model)

            sim = compute_similarity(diff_pair1[good_feats], 
                                     diff_pair2[good_feats],
                                     metric='e')
        plt.scatter(row.mean_rating, -sim, 
                    s=10, color='blue', alpha=0.5)
#         ax[rel_type-1].scatter(row.mean_rating, -sim, 
#                     s=10, color='blue', alpha=0.5)
    print(rel_type)
    plt.show()

In [14]:
df = df_rel_sim.copy()

# train_idxs, val_idxs = naive_train_val_split(df)

# train_raw = df.iloc[train_idxs].copy()
# val_raw = df.iloc[val_idxs].copy()



# val_raw.head()

Unnamed: 0,relation1,relation2,comparison_type,pair1_word1,pair1_word2,pair2_word1,pair2_word2,mean_rating,num_ratings,rel1_type,rel2_type
3524,5a,5a,within-subtype,cherry,red,clay,malleable,4.636364,11,5,5
2629,8a,8a,within-subtype,stimulus,response,sweat,run,4.0,11,8,8
4123,1a,1a,within-subtype,car,mustang,politician,senator,6.0,11,1,1
5529,8b,8a,between-subtype,coldness,shiver,eating,fullness,5.4,10,8,8
2766,5a,5a,within-subtype,intellectual,professor,murderer,evil,4.090909,11,5,5


In [15]:
pd.get_dummies(df_rel_sim, columns=['comparison_type'])[['comparison_type_between-subtype', 
                                                        'comparison_type_between-type',
                                                        'comparison_type_within-subtype']]

Unnamed: 0,comparison_type_between-subtype,comparison_type_between-type,comparison_type_within-subtype
0,0,0,1
1,0,0,1
2,0,0,1
3,0,0,1
4,0,0,1
5,0,0,1
6,0,0,1
7,0,1,0
8,0,1,0
9,0,0,1


In [123]:
# df_exp = df_rel_sim.copy() # all data
df_exp = df_rel_sim[df_rel_sim.comparison_type!='between-type'].copy()

train_idxs, val_idxs = naive_train_val_split(df_exp, seed=3)

def build_data_for_tuning(df, model, train_idxs, val_idxs, dims=None):
    
    train_raw = df.iloc[train_idxs].copy()
    val_raw = df.iloc[val_idxs].copy()
    
    U_train, U_val = [], []
    V_train, V_val = [], []
    y_train, y_val = [], []
#     type_train = [], type_val = [], []
    
    for r, row in train_raw.iterrows():
        
        words = get_analogy_words(row)
        
        if words_in_vocab(words, model):
            diff_pair1, diff_pair2 = \
                get_diff_vecs(words, model, dims=dims)
        
        U_train.append(diff_pair1)
        V_train.append(diff_pair2)
        y_train.append(row.mean_rating)
        
    for r, row in val_raw.iterrows():
        
        words = get_analogy_words(row)
        
        if words_in_vocab(words, model):
            diff_pair1, diff_pair2 = \
                get_diff_vecs(words, model, dims=dims)
            
        U_val.append(diff_pair1)
        V_val.append(diff_pair2)
        y_val.append(row.mean_rating)
        
    return [np.array(x) for x in [U_train, U_val, V_train, V_val, y_train, y_val]]

# create dataset
U_train, U_val, V_train, V_val, y_train, y_val = \
    build_data_for_tuning(df_exp,
                          w2v_gnews, train_idxs, val_idxs)

# check shapes
for _ in [U_train, U_val, V_train, V_val, y_train, y_val]:
    print(_.shape)

(4220, 300)
(1055, 300)
(4220, 300)
(1055, 300)
(4220,)
(1055,)


In [124]:
preds = []
for u, v in zip(U_val, V_val):
    pred = compute_similarity(u, v, metric='e')
    preds.append(pred)
    
print(np.corrcoef(preds, y_val)[0,1])

0.3612125067202853


In [125]:
import keras
from keras.layers import Input, Dense
from keras.layers import concatenate, multiply, dot
from keras.models import Model

d = 300

u_input = Input(shape=(d,))
v_input = Input(shape=(d,))

diff_vecs = concatenate([u_input, v_input])

w = Dense(d, activation='linear')(diff_vecs)
w_sq = multiply([w, w])

u_mult_w_sq = multiply([u_input, w_sq])

final_dot = dot([u_mult_w_sq, v_input], axes=1)

tuner = Model(inputs=[u_input, v_input], 
              outputs=final_dot)

# print(tuner.summary())

tuner.compile(optimizer='rmsprop',
              loss='mean_squared_error')

def eval_tuner():
    preds_train = tuner.predict([U_train, V_train])
    preds_val = tuner.predict([U_val, V_val])
    print('Train:', np.corrcoef(preds_train.flatten(), y_train)[0,1])
    print('Val  :', np.corrcoef(preds_val.flatten(), y_val)[0,1])
    print('')

eval_tuner()
for epoch in range(60):
    tuner.fit([U_train, V_train], y_train, 
              validation_data=([U_val, V_val], y_val),
              verbose=1)
    eval_tuner()

Train: 0.15909395023178471
Val  : 0.12341639451952253

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.2911924191502079
Val  : 0.280953168123332

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.2932714493549004
Val  : 0.2797243793124052

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.29461579493467493
Val  : 0.2795435432434388

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.2962890397991324
Val  : 0.28010212808337487

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.29908567304032907
Val  : 0.2810477437798998

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3030104514865415
Val  : 0.2820101248012115

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.30615378384530817
Val  : 0.28182646107574294

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.30736781164826654
Val  : 0.2799593002550552

Train on 4220 samples, validate on 1055 samples
Epoc

KeyboardInterrupt: 

In [122]:
# ALLOW NEGATIVE WEIGHTS!

d = 300

u_input = Input(shape=(d,))
v_input = Input(shape=(d,))

diff_vecs = concatenate([u_input, v_input])

w = Dense(d, activation='linear')(diff_vecs)
#w_sq = multiply([w, w])

u_mult_w_sq = multiply([u_input, w])

final_dot = dot([u_mult_w_sq, v_input], axes=1)

tuner = Model(inputs=[u_input, v_input], 
              outputs=final_dot)

# print(tuner.summary())

tuner.compile(optimizer='rmsprop',
              loss='mean_squared_error')

def eval_tuner():
    preds_train = tuner.predict([U_train, V_train])
    preds_val = tuner.predict([U_val, V_val])
    print('Train:', np.corrcoef(preds_train.flatten(), y_train)[0,1])
    print('Val  :', np.corrcoef(preds_val.flatten(), y_val)[0,1])
    print('')

eval_tuner()
for epoch in range(200):
    tuner.fit([U_train, V_train], y_train, 
              validation_data=([U_val, V_val], y_val),
              verbose=1)
    eval_tuner()

Train: -0.005118875210539643
Val  : 0.04605324904128041

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.35204706319832485
Val  : 0.29338229696939916

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3555073309537335
Val  : 0.2941301028985686

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3560480131314369
Val  : 0.29370307846711013

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3568327417653905
Val  : 0.29382202626293924

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.35728863240589476
Val  : 0.2938259803097887

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.35784704811870083
Val  : 0.2939979082262203

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3583934695283921
Val  : 0.29415302674455257

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3589378683945502
Val  : 0.294291323327725

Train on 4955 samples, validate on 1239 samples
E

Train: 0.40832607939353555
Val  : 0.308209280027557

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.40895640094773683
Val  : 0.3083089339121723

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.40959171976931136
Val  : 0.30840213316582377

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.41022749818494575
Val  : 0.3084908688161777

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.41085240964904385
Val  : 0.30855742539335396

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4114773871181538
Val  : 0.3086375649944462

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4121047415635626
Val  : 0.3087140146793076

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.41272038325890054
Val  : 0.3087856364696545

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4133507392323305
Val  : 0.308858593543044

Train on 4955 samples, validate on 1239 samples
Epoch

Train: 0.4324175204578054
Val  : 0.31052144332936094

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4330085999109435
Val  : 0.310564750774515

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.43359380687453025
Val  : 0.31060668876563663

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4341854712852202
Val  : 0.31064200281467547

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.43477190100423424
Val  : 0.31069403468043005

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.43536170952061926
Val  : 0.3107402389293139

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4359425503414076
Val  : 0.310774707399144

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4365264623754258
Val  : 0.3108208115728972

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.4371220364283767
Val  : 0.3108618122881534

Train on 4955 samples, validate on 1239 samples
Epoch 

Train: 0.48289300669182433
Val  : 0.31318055958594215



In [117]:
# SHARED WEIGHTS!

d = 300

u_input = Input(shape=(d,))
v_input = Input(shape=(d,))

shared = Dense(d, activation='linear')

u_w = shared(u_input)
v_w = shared(v_input)

w = keras.layers.add([u_w, v_w])

w_sq = multiply([w, w])

u_mult_w_sq = multiply([u_input, w_sq])

final_dot = dot([u_mult_w_sq, v_input], axes=1)

tuner = Model(inputs=[u_input, v_input], 
              outputs=final_dot)

# print(tuner.summary())

tuner.compile(optimizer='rmsprop',
              loss='mean_squared_error')

def eval_tuner():
    preds_train = tuner.predict([U_train, V_train])
    preds_val = tuner.predict([U_val, V_val])
    print('Train:', np.corrcoef(preds_train.flatten(), y_train)[0,1])
    print('Val  :', np.corrcoef(preds_val.flatten(), y_val)[0,1])
    print('')

eval_tuner()
for epoch in range(200):
    tuner.fit([U_train, V_train], y_train, 
              validation_data=([U_val, V_val], y_val),
              verbose=1)
    eval_tuner()

Train: 0.21486070403938934
Val  : 0.19116040983278643

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.2995279889056084
Val  : 0.30702629784746893

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.29985185119603186
Val  : 0.3074264478740778

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.30121788882811185
Val  : 0.30870834138315695

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.30458318768682463
Val  : 0.31093665105952567

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3103197791322965
Val  : 0.31495369410376356

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.31463472998421205
Val  : 0.3173902248399199

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3162040139547034
Val  : 0.31716823641205893

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.31516097183986785
Val  : 0.31439512897556926

Train on 4220 samples, validate on 1055 sample

Train: 0.38184672613454307
Val  : 0.2964770783050034

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.38818629206469385
Val  : 0.2987747924325636

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.39435281792919796
Val  : 0.30065200479609827

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3995923692502213
Val  : 0.30160676196435066

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4047215632991593
Val  : 0.3024332656916667

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4114576891968523
Val  : 0.3046558494120956

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4177105870080813
Val  : 0.3063358947650581

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4237025793032785
Val  : 0.3076498957574809

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.43015994074980735
Val  : 0.30962361735416527

Train on 4220 samples, validate on 1055 samples
Epoc

Train: 0.619206916022907
Val  : 0.33957242815632355

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.6233710425597413
Val  : 0.3388942100233664

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.6294708206586486
Val  : 0.34101462320285403

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.6330992424464574
Val  : 0.3393171377364805

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.6388077665697227
Val  : 0.34070696556996266

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.6441036551836701
Val  : 0.3414765408183097

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.648310560491734
Val  : 0.34091766538937796

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.653987056551754
Val  : 0.34239095724468727

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.6590001038235508
Val  : 0.3427178503473097

Train on 4220 samples, validate on 1055 samples
Epoch 1/1

Train: 0.8818136742872954
Val  : 0.34029047107240634

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8830848893479423
Val  : 0.3392647036273114

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8847737307633251
Val  : 0.33973391172877987

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8862643734275268
Val  : 0.3395450792719792

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8878946599645318
Val  : 0.34045639100518704

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8890641907905615
Val  : 0.3390423863372134

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8905586391444394
Val  : 0.33909829314863454

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8920227448918163
Val  : 0.3392587521023244

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.8932685360848387
Val  : 0.33839851826624395

Train on 4220 samples, validate on 1055 samples
Epoch 

In [127]:
# SHARED WEIGHTS + WEIGHT REGULARIZATION!

d = 300

u_input = Input(shape=(d,))
v_input = Input(shape=(d,))

shared = Dense(d, activation='linear',
               kernel_regularizer=keras.regularizers.l2(0.0001))

u_w = shared(u_input)
v_w = shared(v_input)

w = keras.layers.add([u_w, v_w])

w_sq = multiply([w, w])

u_mult_w_sq = multiply([u_input, w_sq])

final_dot = dot([u_mult_w_sq, v_input], axes=1)

tuner = Model(inputs=[u_input, v_input], 
              outputs=final_dot)

# print(tuner.summary())

tuner.compile(optimizer='rmsprop',
              loss='mean_squared_error')

def eval_tuner():
    preds_train = tuner.predict([U_train, V_train])
    preds_val = tuner.predict([U_val, V_val])
    print('Train:', np.corrcoef(preds_train.flatten(), y_train)[0,1])
    print('Val  :', np.corrcoef(preds_val.flatten(), y_val)[0,1])
    print('')

eval_tuner()
for epoch in range(200):
    tuner.fit([U_train, V_train], y_train, 
              validation_data=([U_val, V_val], y_val),
              verbose=1)
    eval_tuner()

Train: 0.21712093552144557
Val  : 0.15146658634265744

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.2925575347590198
Val  : 0.28158716200874123

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.2955284073854888
Val  : 0.2836274878883054

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.29797627626481943
Val  : 0.28463675592545246

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3022257819219704
Val  : 0.28661010784233426

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3092933082318193
Val  : 0.28896784009057436

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3150200430345272
Val  : 0.28917961737427605

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3173983760733353
Val  : 0.2864145141648976

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.3168814936503152
Val  : 0.28252810017386587

Train on 4220 samples, validate on 1055 samples
Ep

Train: 0.4775820184011682
Val  : 0.2937155730127951

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4784607183742048
Val  : 0.29318933757573273

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4800771701952783
Val  : 0.2933822497578016

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4818306626040203
Val  : 0.293927017393329

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4828007099706361
Val  : 0.29369005136414705

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.48400822763378215
Val  : 0.29367499106146355

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4859595595854518
Val  : 0.2944242125280239

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4873840170280307
Val  : 0.29453682254216873

Train on 4220 samples, validate on 1055 samples
Epoch 1/1
Train: 0.4879176144992737
Val  : 0.29385803444324743

Train on 4220 samples, validate on 1055 samples
Epoch 

KeyboardInterrupt: 

In [100]:
# RELU INSTEAD OF SQUARE W!!

d = 300

u_input = Input(shape=(d,))
v_input = Input(shape=(d,))

diff_vecs = concatenate([u_input, v_input])

w = Dense(d, activation='relu')(diff_vecs)

u_mult_w = multiply([u_input, w])

final_dot = dot([u_mult_w, v_input], axes=1)

tuner = Model(inputs=[u_input, v_input], 
              outputs=final_dot)

# print(tuner.summary())

tuner.compile(optimizer='rmsprop',
              loss='mean_squared_error')

def eval_tuner():
    preds_train = tuner.predict([U_train, V_train])
    preds_val = tuner.predict([U_val, V_val])
    print('Train:', np.corrcoef(preds_train.flatten(), y_train)[0,1])
    print('Val  :', np.corrcoef(preds_val.flatten(), y_val)[0,1])
    print('')

eval_tuner()
for epoch in range(60):
    tuner.fit([U_train, V_train], y_train, 
              validation_data=([U_val, V_val], y_val),
              verbose=1)
    eval_tuner()

Train: 0.15193617681553176
Val  : 0.11672301506413547

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.33543496615001767
Val  : 0.28270333888354926

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.34175537606164114
Val  : 0.28645207037499826

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.34386137309560016
Val  : 0.287532890057162

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3447440385585991
Val  : 0.28764349234387915

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.345623023719111
Val  : 0.2882507872525929

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.34623080681504187
Val  : 0.2884373632503174

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3468922092482544
Val  : 0.2887774357858243

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3473580316929571
Val  : 0.2888869369088582

Train on 4955 samples, validate on 1239 samples
Epoc

Train: 0.38323736581430934
Val  : 0.2971452545673046

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3836181120841838
Val  : 0.2971243946568426

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.38400283102979016
Val  : 0.2971331554246178

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.38438730320801445
Val  : 0.2971318170066904

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3847661585267367
Val  : 0.29709056959384555

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3851435387282343
Val  : 0.2970902831372456

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3855207646021687
Val  : 0.2970759854644459

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3858939897561367
Val  : 0.2970252411876794

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.38627414190976267
Val  : 0.2970225619683971

Train on 4955 samples, validate on 1239 samples
Epoch 

KeyboardInterrupt: 

In [83]:
# TRY BOTTLENECK!!

import keras
from keras.layers import Input, Dense
from keras.layers import concatenate, multiply, dot
from keras.models import Model

d = 300

u_input = Input(shape=(d,))
v_input = Input(shape=(d,))

diff_vecs = concatenate([u_input, v_input])

bottleneck = Dense(150, activation='linear')(diff_vecs)
w = Dense(d, activation='linear')(bottleneck)
w_sq = multiply([w, w])

u_mult_w_sq = multiply([u_input, w_sq])

final_dot = dot([u_mult_w_sq, v_input], axes=1)

tuner = Model(inputs=[u_input, v_input], 
              outputs=final_dot)

# print(tuner.summary())

tuner.compile(optimizer='rmsprop',
              loss='mean_squared_error')

def eval_tuner():
    preds_train = tuner.predict([U_train, V_train])
    preds_val = tuner.predict([U_val, V_val])
    print('Train:', np.corrcoef(preds_train.flatten(), y_train)[0,1])
    print('Val  :', np.corrcoef(preds_val.flatten(), y_val)[0,1])
    print('')

eval_tuner()
for epoch in range(50):
    tuner.fit([U_train, V_train], y_train, 
              validation_data=([U_val, V_val], y_val),
              verbose=1)
    eval_tuner()

Train: 0.15936289944840984
Val  : 0.11936493267517119

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.2775347820766862
Val  : 0.2454452280400833

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3313178703140733
Val  : 0.2925014193842448

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.3370596660298611
Val  : 0.2972243421553206

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.34588803495475795
Val  : 0.3098683208863316

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.35205099710316917
Val  : 0.316401117724531

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.36132094106733814
Val  : 0.32719833908755613

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.37595288483994177
Val  : 0.33384361914911126

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.38059603569414124
Val  : 0.3320735576006322

Train on 4955 samples, validate on 1239 samples
Epo

Train: 0.9008007829993092
Val  : 0.26189858462501026

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.9075652721380131
Val  : 0.2633414757433929

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.9128917240901747
Val  : 0.25625029426216367

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.9171063779581328
Val  : 0.2562239232791004

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.9210725786633951
Val  : 0.2521754158344972

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.9253161992505133
Val  : 0.2525208024043792

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.9290605791876332
Val  : 0.2532804642244092

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.9310639534362563
Val  : 0.25250323347169845

Train on 4955 samples, validate on 1239 samples
Epoch 1/1
Train: 0.933064452027728
Val  : 0.24769085494229093

Train on 4955 samples, validate on 1239 samples
Epoch 1/