# Measuring Semantic Change

In [1]:
import pickle
from tqdm import tqdm
import os

import scipy.sparse as sp
import numpy as np
import pandas as pd

In [28]:
pos = 'V'
suffix = 'doubNorm_cosine/'
NN_FOLDER = './nearest_neighbors/'+suffix

DECADES = list(range(1890,2000,10))

In [29]:
if pos.lower() in ['a','adj','adjective']:
    pos = 'A'
    with open('./words/adjs_list.pickle','rb') as f:
        word_list = pickle.load(f)
elif pos.lower() in ['n','noun']:
    pos = 'N'
    with open('./words/nouns_list.pickle','rb') as f:
        word_list = pickle.load(f)
elif pos.lower() in ['v','verb']:
    pos = 'V'
    with open('./words/verbs_list.pickle','rb') as f:
        word_list = pickle.load(f)
else:
    raise ValueError('Invalid POS tag value : ' + pos)

word2ind = {word : i for i,word in enumerate(word_list)}

## Generating the dataframe

In [30]:
semChange = pd.DataFrame(columns=DECADES,dtype='float16',index = word_list )
semChange.index.name = 'words'
semChange[1890] = np.zeros(len(word_list))
semChange

Unnamed: 0_level_0,1890,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990
words,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
abandon,0.0,,,,,,,,,,
abandoned,0.0,,,,,,,,,,
abandoning,0.0,,,,,,,,,,
abandons,0.0,,,,,,,,,,
abate,0.0,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
zeitung,0.0,,,,,,,,,,
zigzagged,0.0,,,,,,,,,,
zigzagging,0.0,,,,,,,,,,
zipped,0.0,,,,,,,,,,


In [31]:
files = os.listdir(NN_FOLDER)
available_decades = []
for f in files:
    fname = f.replace('.npy','') #remove .npy
    f_dec, f_pos = fname.split('_')
    if f_pos == pos:
        available_decades.append(int(f_dec))
if 1890 not in available_decades:
    raise FileNotFoundError('No file for the origin decade 1890.')
print(available_decades)
available_decades.pop( available_decades.index(1890) )

[1890, 1990]


1890

In [32]:
nn_origin_array = np.load(NN_FOLDER+f'{1890}_{pos}.npy')
for decade in available_decades:
    nn_array = np.load(NN_FOLDER+f'{decade}_{pos}.npy')
    k = 100
    target_change = np.empty(len(word_list))
    for i in range(len(word_list)):
        nn_origin = nn_origin_array[i][:k]
        nn_target = nn_array[i][:k]
        intersect = np.intersect1d(nn_origin,nn_target)
        sc_target = 1-len(intersect)/len(nn_origin)
        target_change[i] = sc_target
    print(decade,'Average change : ', target_change.mean().round(4))
    print(decade,'Std Dev. of change: ', target_change.std().round(4))
    semChange[ decade ] = target_change.copy()


1990 Average change :  0.779
1990 Std Dev. of change:  0.1203


In [33]:
semChange

Unnamed: 0_level_0,1890,1900,1910,1920,1930,1940,1950,1960,1970,1980,1990
words,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
abandon,0.0,,,,,,,,,,0.73
abandoned,0.0,,,,,,,,,,0.69
abandoning,0.0,,,,,,,,,,0.77
abandons,0.0,,,,,,,,,,0.81
abate,0.0,,,,,,,,,,0.92
...,...,...,...,...,...,...,...,...,...,...,...
zeitung,0.0,,,,,,,,,,0.68
zigzagged,0.0,,,,,,,,,,0.78
zigzagging,0.0,,,,,,,,,,0.72
zipped,0.0,,,,,,,,,,0.83


In [34]:
semChange.to_csv(f'./semantic_change_measures/{suffix}/{pos}.csv', sep='\t',index=True)

## A bit of exploration

In [96]:
[(word_list[i],target_change[i].round(3)) for i in np.argsort(target_change)]

[('reddish', 0.19),
 ('dark', 0.21),
 ('even', 0.21),
 ('dutch', 0.21),
 ('grey', 0.21),
 ('old', 0.21),
 ('silvery', 0.21),
 ('ly', 0.22),
 ('persian', 0.22),
 ('burnt', 0.22),
 ('afraid', 0.22),
 ('still', 0.23),
 ('green', 0.23),
 ('burning', 0.23),
 ('blue', 0.23),
 ('away', 0.23),
 ('prepared', 0.24),
 ('frightened', 0.24),
 ('occasional', 0.24),
 ('lustrous', 0.24),
 ('shining', 0.24),
 ('grayish', 0.24),
 ('shiny', 0.24),
 ("ev'ry", 0.25),
 ('saxon', 0.25),
 ('resolute', 0.25),
 ('near', 0.25),
 ('stout', 0.25),
 ('swift', 0.25),
 ('greenish', 0.25),
 ('greyish', 0.25),
 ('shamed', 0.26),
 ('blackened', 0.26),
 ('blind', 0.26),
 ('well', 0.26),
 ('turkish', 0.26),
 ('cool', 0.26),
 ('dauntless', 0.26),
 ('alone', 0.26),
 ('sloping', 0.26),
 ('obliging', 0.26),
 ('whitish', 0.26),
 ('arrant', 0.26),
 ('greasy', 0.26),
 ('chinese', 0.27),
 ('small', 0.27),
 ('snowy', 0.27),
 ('soft', 0.27),
 ('stiff', 0.27),
 ('right', 0.27),
 ('cold', 0.27),
 ('innocent', 0.27),
 ('red', 0.27),
 

In [56]:
[(word_list[i],target_change[i]) for i in np.flip(np.argsort(target_change))]

[('repellent', 1.0),
 ('unmanned', 1.0),
 ('catty', 1.0),
 ('ignoble', 1.0),
 ('gainful', 1.0),
 ('minty', 1.0),
 ('centrist', 0.99),
 ('thrifty', 0.99),
 ('renewable', 0.99),
 ('tiered', 0.99),
 ('scrumptious', 0.99),
 ('sonic', 0.99),
 ('chunky', 0.99),
 ('discursive', 0.99),
 ('stainless', 0.99),
 ('chesty', 0.99),
 ('moot', 0.99),
 ('fuzzy', 0.99),
 ('hazardous', 0.98),
 ('regal', 0.98),
 ('individualized', 0.98),
 ('aging', 0.98),
 ('amphibious', 0.98),
 ('saccharine', 0.98),
 ('eclectic', 0.98),
 ('ambient', 0.97),
 ('tannic', 0.97),
 ('forthright', 0.97),
 ('fabulous', 0.97),
 ('diversified', 0.97),
 ('cultured', 0.97),
 ('turbulent', 0.97),
 ('dusty', 0.97),
 ('educable', 0.96),
 ('wishful', 0.96),
 ('interplanetary', 0.96),
 ('formalized', 0.96),
 ('phony', 0.96),
 ('newfound', 0.96),
 ('deft', 0.96),
 ('adolescent', 0.96),
 ('riveting', 0.96),
 ('uninsured', 0.95),
 ('literate', 0.95),
 ('intramural', 0.95),
 ('browner', 0.95),
 ('sleazy', 0.95),
 ('unmeasured', 0.95),
 ('mer

In [122]:
word = 'tasteful'
ind = word2ind[word]
k = 100

nn_origin = nn_origin_array[ind][:k]
nn_target = nn_array[ind][:k]
intersect = np.intersect1d(nn_origin,nn_target)

print('1890 : ',[word_list[i] for i in nn_origin])
print('1990 : ',[word_list[i] for i in nn_target])
print('Intersect : ',[word_list[i] for i in intersect])
print('Change : ',1-len(intersect)/len(nn_origin))

1890 :  ['ingenious', 'shrewd', 'lifelike', 'masterly', 'plucky', 'praiseworthy', 'elegant', 'felicitous', 'talented', 'judicious', 'serviceable', 'pugnacious', 'instructive', 'astute', 'mediocre', 'farreaching', 'delusive', 'salutary', 'bigoted', 'estimable', 'advantageous', 'quarrelsome', 'energetic', 'sumptuous', 'imprudent', 'meritorious', 'regrettable', 'shortsighted', 'skilful', 'sagacious', 'intractable', 'mischievous', 'lovable', 'ostentatious', 'voracious', 'equidistant', 'forceful', 'quaint', 'munificent', 'elaborate', 'tortuous', 'exhaustive', 'admirable', 'laudable', 'delicate', 'improvident', 'pleasing', 'fastidious', 'irrepressible', 'deplorable', 'meager', 'licentious', 'skillful', 'hospitable', 'meagre', 'sturdy', 'zealous', 'generous', 'costly', 'fallible', 'anomalous', 'lenient', 'antiquated', 'parsimonious', 'multifarious', 'strenuous', 'discouraging', 'irate', 'ennobling', 'corpulent', 'indiscreet', 'illogical', 'cumbersome', 'impecunious', 'intricate', 'gaudy', 'fe

0.7