# Микродиахроническое исследование русских приставок методами дистрибутивной семантики
## Автор: Елизавета Клыкова, БКЛ181
### Анализ результатов ELMo (глаголы внутри приставок)

#### Импорт модулей

In [1]:
# %load_ext pycodestyle_magic
# %pycodestyle_on

In [2]:
import pickle
import numpy as np
from tqdm.auto import tqdm
from statistics import mean
from scipy.spatial.distance import cityblock

#### Считываем файлы с эмбеддингами

In [3]:
with open('verbs_to_search.pickle', 'rb') as f:
    verbs_to_search = pickle.load(f)

In [4]:
with open('prefs_verbs_10.pickle', 'rb') as f:
    prefs_verbs_10 = pickle.load(f)

In [5]:
with open('presov_samples.pickle', 'rb') as f:
    presov_samples = pickle.load(f)

In [6]:
with open('sov_samples.pickle', 'rb') as f:
    sov_samples = pickle.load(f)

In [7]:
with open('postsov_samples.pickle', 'rb') as f:
    postsov_samples = pickle.load(f)

In [8]:
verb_embeddings = {}
for verb in verbs_to_search:
    mean_presov = np.average([emb[-1] for emb in presov_samples[verb]], axis=0)
    mean_sov = np.average([emb[-1] for emb in sov_samples[verb]], axis=0)
    mean_postsov = np.average([emb[-1] for emb in postsov_samples[verb]], axis=0)
    verb_embeddings[verb] = {'presov': mean_presov,
                             'sov': mean_sov,
                             'postsov': mean_postsov,
                             'pre_to_sov': cityblock(mean_presov, mean_sov),
                             'sov_to_post': cityblock(mean_sov, mean_postsov),
                             'pre_to_post': cityblock(mean_presov, mean_postsov)}

In [9]:
pref_embeddings = {}
for pref in prefs_verbs_10:
    pref_embeddings[pref] = {}
    for verb in prefs_verbs_10[pref]:
        pref_embeddings[pref][verb] = verb_embeddings[verb]

In [10]:
pref_change = {}
for pref in pref_embeddings:
    presov_change = mean(pref_embeddings[pref][verb]['pre_to_sov']
                         for verb in pref_embeddings[pref])
    sov_change = mean(pref_embeddings[pref][verb]['sov_to_post']
                      for verb in pref_embeddings[pref])
    postsov_change = mean(pref_embeddings[pref][verb]['pre_to_post']
                          for verb in pref_embeddings[pref])
    pref_change[pref] = {'pre_to_sov': presov_change,
                         'sov_to_post': sov_change,
                         'pre_to_post': postsov_change}

In [11]:
pre_to_sov_ranged = [pref for pref in sorted(pref_change,
                                             key=lambda x: pref_change[pref]['pre_to_sov'],
                                             reverse=True)]

In [12]:
sov_to_post_ranged = [pref for pref in sorted(pref_change,
                                              key=lambda x: pref_change[pref]['sov_to_post'],
                                              reverse=True)]

In [13]:
pre_to_post_ranged = [pref for pref in sorted(pref_change,
                                              key=lambda x: pref_change[pref]['pre_to_post'],
                                              reverse=True)]

In [14]:
for pref in pre_to_sov_ranged:
    print(pref)

недо
на
при
над
до
ни[зс]
про
вы
у
по
под
пре
полу
и[зс]
от
ра[зс]
за
о
в[зс]
обе[зс]
пере
с
во[зс]
бе[зс]
во?
пред


In [15]:
for pref in sov_to_post_ranged:
    print(pref)

недо
на
при
над
до
ни[зс]
про
вы
у
по
под
пре
полу
и[зс]
от
ра[зс]
за
о
в[зс]
обе[зс]
пере
с
во[зс]
бе[зс]
во?
пред


In [16]:
for pref in pre_to_post_ranged:
    print(pref)

недо
на
при
над
до
ни[зс]
про
вы
у
по
под
пре
полу
и[зс]
от
ра[зс]
за
о
в[зс]
обе[зс]
пере
с
во[зс]
бе[зс]
во?
пред
