In [1]:
import numpy as np
import pandas as pd

from gensim.models import KeyedVectors
from scipy.spatial.distance import cosine
from tqdm import tqdm_notebook

from sources import parse_glove_vocab

from IPython.display import Markdown, display

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [3]:
m1 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/new-england.w2v.txt')

In [4]:
m2 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/deep-south.w2v.txt')

In [5]:
combined = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/combined.w2v.txt')

In [6]:
vocab = parse_glove_vocab('../../data/corpora/north-south/vocab.txt')

In [70]:
def concept_diff(m1, m2, seed, depth=50, topn=20):
    
    m1_sim = [t for t, _ in m1.most_similar(seed, topn=depth)]
    m2_sim = [t for t, _ in m2.most_similar(seed, topn=depth)]
    
    m1_avg = np.array([combined[t] for t in m1_sim]).mean(0)
    m2_avg = np.array([combined[t] for t in m2_sim]).mean(0)
    
    return combined.similar_by_vector(m1_avg-m2_avg, topn=topn)

In [71]:
def north(seed, *args, **kwargs):
    return concept_diff(m1, m2, seed, *args, **kwargs)

In [72]:
def south(seed, *args, **kwargs):
    return concept_diff(m2, m1, seed, *args, **kwargs)

In [73]:
def compare(token, *args, **kwargs):
    display(Markdown(f'# {token}'))
    
    display(Markdown(f'#### New England'))
    print(' '.join([t for t, _ in north(token, *args, **kwargs)]))
    print('\n')
    
    display(Markdown(f'#### South'))
    print(' '.join([t for t, _ in south(token, *args, **kwargs)]))

In [74]:
compare('woman')

# woman

#### New England

feedback provide pitch texting folks noise lowest bottom pleasant maintenance person prices lines helpful road suggestions need particular streets expect




#### South

father daughter grandmother son dies sister wife nephew rolls niece admitted raped admits died revenge daughters killed kidnapped bella carried


In [63]:
compare('choice')

# choice

#### New England

republican democrat democratic dem gop racist voting anti racism candidate conservative political against corrupt establishment socialist race republicans corruption candidates




#### South

wk giveaway depressing entered 1k prize enter food sweepstakes destination 600 يوم spinner ✈ chance travel disabled slice tip haul


In [75]:
compare('abortion')

# abortion

#### New England

oral addiction healthy diet pregnancy finale pregnant هذا treatment eating satisfied أو doctor credits lemonade satisfying kylie soap maki hungry




#### South

radical islamic terrorist terrorism terrorists terror islam muslims supremacist nazi isis muslim jews antifa neo supremacists nazis communist fascist christians


In [76]:
compare('freedom')

# freedom

#### New England

supremacists racism rhetoric terrorism nazis supremacy hatred racists radical hypocrisy conservatives liberals violent leftist violence ignorance propaganda defend wh gop




#### South

👑 🙏 queen 😇 goddess blessings supreme daily life queens ✨ 😍 live 😘 ❤ 💞 king 🙌🏾 peace slay


In [77]:
compare('life')

# life

#### New England

franchise has owns ruined story hasn apparently destroyed changed watched owned 2013 movie mystery hadn became stole disney concept forgotten




#### South

🙏 speaking blessings peace 😇 👑 goddess vibes freedom positivity supreme positive 🙏🏽 keep 🙏🏾 😌 ✌ negativity pray daily


In [78]:
compare('birth')

# birth

#### New England

insurance rates symptoms risk harm patients reduce improve rate interest illness common pain higher mortgage lowest aca prices depression increase




#### South

welcomes lion king frog city owl sanctuary وبحمده teddy lego bunny themed rat disney cutest rescue khan animal pitbull mayor
