# Different linguistics features

In this notebook, I see what happens if we use different linguistic features with QVEC.

In [2]:
%matplotlib inline
import os
import csv
import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

data_path = '../../data'
tmp_path = '../../tmp'

### Learnt embeddings

In [3]:
size = 300
fname = 'embeddings/glove.6B.{}d.txt'.format(size)
embedding_path = os.path.join(data_path, fname)
embeddings = pd.read_csv(embedding_path, sep=' ', header=None, index_col=0, quoting=csv.QUOTE_NONE).T

### QVEC model

In [4]:
def qvec(features, embeddings):
    """
    Returns correlations between columns of `features` and `embeddings`.
    
    The aligned feature is the one with the highest correlation.
    The qvec score is the sum of correlations of aligned features.
    """
    common_words = embeddings.columns.intersection(features.columns)
    S = features[common_words]
    X = embeddings[common_words]
    correlations = pd.DataFrame({i:X.corrwith(S.iloc[i], axis=1) for i in range(len(S))})
    correlations.columns = S.index
    return correlations

## FrameNet - does word $j$ evoke frame $i$?

In [5]:
correlations = qvec(subset, embeddings)
V = len(embeddings.columns.intersection(subset.columns))

In [6]:
correlations.head()

Unnamed: 0,noun.Tops,noun.act,noun.animal,noun.artifact,noun.attribute,noun.body,noun.cognition,noun.communication,noun.event,noun.feeling,...,verb.consumption,verb.contact,verb.creation,verb.emotion,verb.motion,verb.perception,verb.possession,verb.social,verb.stative,verb.weather
1,-0.025195,0.103305,0.022804,0.01996,-0.005073,-0.06304,0.001746,-0.044627,0.060426,-0.000629,...,-0.000923,0.032615,-0.059228,0.002191,0.043395,0.016654,-0.000253,0.015861,-0.058195,0.027505
2,0.025052,0.003521,0.071996,0.033976,0.046986,0.000275,-0.02589,-0.093323,0.013002,-0.026363,...,0.041451,0.004752,0.008058,-0.024316,-0.043428,0.021968,-0.003188,0.02407,0.064498,-0.034473
3,0.000335,0.132868,-0.020388,-0.043322,0.026484,-0.031252,0.051305,0.025105,0.069223,0.112211,...,-0.009357,-0.078781,-0.096304,-0.000102,-0.010545,-0.01511,-0.082116,-0.047947,-0.09822,0.008797
4,0.00388,0.020326,0.023546,-0.164084,-0.013492,-0.079157,0.008527,0.045884,-0.002118,-0.003778,...,0.031046,-0.015342,-0.029799,0.009851,0.045801,-0.023565,0.036102,0.00305,0.059049,-0.009769
5,0.011347,0.032593,0.008034,-0.031921,-0.01544,0.030811,0.026431,-0.02038,-0.054214,0.032733,...,-0.006414,-0.024326,-0.012067,0.045959,-0.026189,-0.051913,0.01384,-0.012455,0.000732,-0.006095
