In [None]:
import os
import copy
import math
import pickle
import spacy
from tqdm import tqdm
import numpy as np
import pandas as pd
import plotly.express as px
from graphdatascience import GraphDataScience
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
import ray

nlp = spacy.load('en_core_web_sm')

tables_path = 'tables/tables_52_88/'


In [None]:
gds = GraphDataScience("bolt://localhost:7687", auth=('neo4j', 'bos'), database='frus5288')

embedding_df = gds.run_cypher(
    """
        match (e:DynamicEntity4YearBinned)
        return e.name as entity, e['fastrp-embedding'] as fastrp_embedding
    """
)

In [None]:
reduced_emb_mat = TSNE(n_components=2, perplexity=50).fit_transform(np.stack(embedding_df['fastrp_embedding']))

x,y = reduced_emb_mat[:,0],reduced_emb_mat[:,1]

fig = px.scatter(x=x, y=y, text=embedding_df['entity'].values, width=900, height=900)
fig.write_html("ne2vec/69_76_dynamic_mincnt20_fastrp128.html")

In [None]:
cossim_mat = cosine_similarity(np.stack(embedding_df['fastrp_embedding']))

def most_similar(word, top_n):

    word_idx = embedding_df[embedding_df['entity']==word].index[0]

    similar_entity_idx = np.argsort(cossim_mat[word_idx])[::-1][1:top_n+1]

    similar_entity_names = embedding_df['entity'].values[similar_entity_idx]
    similar_entity_sims = cossim_mat[word_idx][similar_entity_idx]

    return np.array([similar_entity_names,similar_entity_sims]).T

In [None]:
entity = 'NATO'

bins = list(range(1950, 1990, 4))
labels = []
for i in range(1, len(bins)):
    labels.append(str(bins[i-1])[-2:]+'-'+str(bins[i])[-2:])

temp_df = pd.DataFrame()

for label in labels:
    dynamic_entity = entity+' '+label
    try:
        result = most_similar(dynamic_entity,10)
        similar_entities = list(map(lambda x: x[0][:-6],result))
        temp_df = pd.concat([temp_df,pd.DataFrame({label:similar_entities})],axis=1)
    except:
        continue

temp_df = temp_df.reset_index(drop=False)
temp_df.rename(columns={'index':'Rank/Bin'},inplace=True)
temp_df['Rank/Bin'] = temp_df['Rank/Bin'] + 1

temp_df

In [None]:
entity1 = 'Angola 82-86'
entity2 = 'Turkey 82-86'
embedding1 = np.array(embedding_df[embedding_df['entity']== entity1]['fastrp_embedding'].values[0]).reshape(1, -1)
embedding2 = np.array(embedding_df[embedding_df['entity']== entity2]['fastrp_embedding'].values[0]).reshape(1, -1)

cosine_similarity(embedding1,embedding2)