In [1]:
import numpy as np
import pandas as pd

from gensim.models import KeyedVectors
from scipy.spatial.distance import cosine
from tqdm import tqdm_notebook

from sources import parse_glove_vocab

from IPython.display import Markdown, display

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [3]:
pd.options.display.max_rows = 1000

In [4]:
m1 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/new-england.w2v.txt')

In [5]:
m2 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/deep-south.w2v.txt')

In [8]:
def compare(token, n=50):
    display(Markdown(f'# {token}'))
    display(Markdown(f'#### New England'))
    for t, _ in m1.most_similar(token, topn=n):
        print(t)
    print('\n')
    display(Markdown(f'#### South'))
    for t, _ in m2.most_similar(token, topn=n):
        print(t)

In [9]:
compare('woman')

# woman

#### New England

man
wonder
women
lady
accidentally
she
kfc
lung
girl
forced
odd
wife
female
stomach
young
asks
officer
cop
teen
doctor
sues
herself
alive
texting
guy
husband
police
allegedly
pregnant
strange
old
falls
homeless
mother
muslim
elderly
elevator
metal
person
black
killed
sexually
arrested
her
gal
finds
charged
sexy
who
child




#### South

man
pregnant
women
she
herself
accidentally
wife
kfc
lung
girl
daughter
officer
forced
husband
stomach
old
her
skull
mom
men
sexy
elderly
accused
allegedly
black
alive
asks
child
teenager
police
dies
raped
female
gets
killed
teen
finds
doctors
mother
sues
young
arrested
he
charged
sexually
guy
gal
cage
odd
strange


In [10]:
compare('choice')

# choice

#### New England

choose
choices
decision
choosing
pick
chose
awards
ranked
option
readers
argument
voting
kindness
overall
excellent
options
award
tough
winner
freedom
chosen
nominated
hotels
best
good
difference
wise
reasons
decisions
multiple
obvious
abortion
which
right
regardless
word
combination
debate
giving
sense
vote
education
opinion
wrong
taste
teen
picking
imo
reason
respect




#### South

choose
choosing
wk
choices
decision
depressing
disabled
winner
decisions
asking
pick
picking
excellent
readers
hardest
chose
awards
overall
wise
option
kindness
advice
tough
options
win
multiple
given
opinion
logical
either
correct
best
sweepstakes
hotels
award
kind
direction
sense
informed
lottery
chance
wins
food
sides
taste
whether
freedom
happiness
answer
tip


In [11]:
compare('abortion')

# abortion

#### New England

parenthood
anti
rights
immigrant
birth
marriage
sharia
immigration
lgbt
daca
planned
equal
abuse
legal
sanctuary
activist
pregnancy
amendment
treatment
rape
pro
violence
oral
opposed
funding
refugee
advocate
immigrants
conservative
equality
trans
discrimination
clinic
slavery
religious
muslim
harassment
هذه
oppose
illegal
constitutional
obamacare
establishment
feminist
argument
addiction
laws
controversial
protest
medicaid




#### South

parenthood
anti
sharia
immigrant
birth
oppose
planned
discrimination
advocate
lgbt
equality
immigration
assault
clinic
policies
violence
radical
religious
muslim
amendment
activist
feminist
refugee
islamic
pro
rights
sanctuary
funding
slavery
breitbart
constitutional
legal
supports
protest
terrorism
ban
legislation
islam
targeting
milo
obamacare
murder
abuse
laws
conservatives
muslims
trans
equal
hiv
appeal


In [12]:
compare('freedom')

# freedom

#### New England

religious
speech
equality
expression
liberty
constitution
presentation
rights
democracy
choose
religion
amendment
peace
defend
values
destination
fear
sacrifice
fought
hatred
respect
slavery
independence
happiness
peaceful
ignorance
press
constitutional
terrorism
islam
silence
choice
beliefs
privilege
defending
compassion
principles
racism
faith
unity
shall
american
nazis
upcoming
courage
violence
lies
fighters
protecting
protected




#### South

peace
speaking
liberty
goddess
speech
🙏
blessings
religious
😇
democracy
equality
fear
👑
constitution
religion
supreme
rights
life
daily
live
happiness
queen
independence
slavery
amendment
unity
sacrifice
privilege
expression
christ
lives
financial
fighters
equal
choose
destination
defend
movement
justice
protecting
exercise
existence
honor
patriot
joy
hatred
fought
protest
william
consequences


In [13]:
compare('life')

# life

#### New England

lives
everything
living
forever
whole
happiness
truly
change
sometimes
relationship
meaning
ruin
dreams
lived
time
wish
miserable
things
everyday
thankful
loving
honestly
god
my
goals
saving
matter
true
family
saved
entire
shit
else
story
ruined
literally
purpose
choices
always
every
tbh
love
changed
never
friendship
changing
journey
own
something
lesson




#### South

living
lives
sometimes
blessings
change
forever
relationship
whole
happiness
everything
god
peace
freedom
honestly
speaking
things
rest
everyday
😇
someone
existence
myself
truly
saving
loving
way
my
love
nothing
matter
changing
positive
always
goals
else
situations
swear
🙏
thankful
enjoy
your
mood
dreams
learned
friendship
person
career
make
together
shit


In [14]:
compare('birth')

# birth

#### New England

pregnancy
abortion
pregnant
control
child
giving
gave
treatment
gives
marriage
mother
rights
beyoncé
mothers
twins
give
age
bday
given
newborn
amendment
injuries
babies
license
father
months
abuse
rates
rate
oral
michelle
month
older
pills
daughters
younger
since
insurance
married
born
according
fathers
disease
her
symptoms
parents
switched
lowest
gun
baby




#### South

gives
abortion
giving
pregnancy
pregnant
twins
mother
child
baby
gave
beyoncé
beyonce
control
mothers
given
babies
announcement
pills
month
passes
celebrates
twin
born
anniversary
mom
africa
animal
newborn
gender
switched
give
expecting
father
rat
wife
adopted
city
highest
delivery
sanctuary
frog
daughter
rights
son
transgender
admitted
children
sized
cannabis
since
