# Direct nearest-neighbor comparisons

In [6]:
import numpy as np
import pandas as pd

from gensim.models import KeyedVectors
from scipy.spatial.distance import cosine
from tqdm import tqdm_notebook

from sources import parse_glove_vocab

from IPython.display import Markdown, display

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [3]:
pd.options.display.max_rows = 1000

In [4]:
m1 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/new-england.w2v.txt')

In [5]:
m2 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/deep-south.w2v.txt')

In [13]:
def compare(token, n=20):
    display(Markdown(f'# {token}'))
    display(Markdown(f'#### New England'))
    for t, _ in m1.most_similar(token, topn=n):
        print(t)
    print('\n')
    display(Markdown(f'#### South'))
    for t, _ in m2.most_similar(token, topn=n):
        print(t)

In [16]:
compare('future')

# future

#### New England

present
generations
predict
vision
husband
bright
belongs
distant
environment
role
grades
develop
upcoming
focused
generation
gadgets
allah
past
change
potential




#### South

drake
present
bright
husband
type
young
migos
rihanna
jay
uzi
gadgets
chris
rick
kendrick
wayne
distant
dream
album
ross
metro


In [17]:
compare('labor')

# labor

#### New England

wage
secretary
worker
slave
workers
civil
spending
general
independence
force
nominee
minimum
dept
department
costs
society
welfare
slavery
manufacturing
military




#### South

kickoff
weekend
mansion
finale
day
memorial
mlk
atl
sunday
private
warehouse
pool
returns
🙌🏾
friday
closing
entry
nights
sept
night


In [19]:
compare('tide')

# tide

#### New England

roll
desert
heels
low
flooding
rising
climb
tonite
high
heel
winds
unc
rockin
dip
bend
curve
ease
levels
sunset
م




#### South

roll
🏈
alabama
tigers
football
ole
auburn
touchdown
softball
kentucky
coach
eagles
eagle
falcons
panthers
arkansas
carolina
georgia
trails
baseball


In [20]:
compare('wicked')

# wicked

#### New England

witch
legends
reader
llc
solutions
cool
hella
soooooo
feelin
tasty
killer
funny
soo
jealous
fun
nerd
temple
pretty
local
darn




#### South

witch
shall
matthew
tales
holy
ye
shadows
ep
souls
beyond
spirit
evil
unfortunate
khaled
fred
weapon
bastard
thy
creepy
spoilers


In [14]:
compare('beyond')

# beyond

#### New England

exit
belief
miles
cleared
north
south
motor
delays
above
updated
accident
far
disgusted
am
incredibly
vehicle
west
ridiculous
absolutely
traveled




#### South

belief
measure
incredibly
above
far
worlds
blessed
truly
disgusted
thankful
ridiculous
grateful
recognition
stressed
pissed
wicked
annoyed
exhausted
extremely
amazing


In [15]:
compare('void')

# void

#### New England

fill
empty
filling
remake
<unk>
opponent
blank
alice
chains
snakes
speechless
demon
casey
tj
gut
popped
universe
pitbull
janet
cowboy




#### South

novel
lies
scandal
failed
fill
hypocrisy
weakness
filling
philosophy
corruption
issue
repeal
damned
denial
confusion
rural
politics
surrounding
distraction
reject


In [18]:
compare('shots')

# shots

#### New England

shot
tequila
fired
screen
shoot
shooting
henny
exclusive
pirate
scored
hitting
foul
hunter
possession
period
pics
firing
trick
couple
captured




#### South

ladies
🌚
wings
👅
til
drinks
henny
30pm
fights
shot
lap
4th
lit
free
gun
🔥
🔥🔥
😜
11
whip


In [21]:
compare('labels')

# labels

#### New England

jar
stickers
mason
tags
label
sticker
covers
gifts
christmas
valentines
santa
lemonade
vinyl
pandora
confetti
bride
requirements
peach
yummy
fabric




#### South

charts
soundcloud
record
users
plays
label
promotion
sticker
click
trend
submit
100k
exposure
promoted
seen
rappers
tags
bronze
goodies
increase


In [22]:
compare('predict')

# predict

#### New England

researchers
determine
scientists
develop
prevent
identify
experts
risks
suggests
affect
future
artificial
causes
reduce
risk
prepare
improve
factor
recover
manage




#### South

earn
plays
tix
play
points
undefeated
remain
reverse
sweeps
trials
compete
rewards
puzzle
instant
chess
opponent
outcome
neutral
spark
increase


In [23]:
compare('inner')

# inner

#### New England

demons
peace
creates
voice
circle
embrace
confidence
conflict
cities
angels
strength
bridges
flesh
surrounding
divine
imagination
healing
constant
positivity
spirit




#### South

loop
creates
lane
circle
peace
blu
strength
eb
conflict
delays
cities
embrace
voice
wb
nerd
tr
powers
nb
causing
sb


In [24]:
compare('faithful')

# faithful

#### New England

loyal
remain
humble
strive
minded
प
consistent
pastor
لك
به
undefeated
त
foolish
therefore
ज
motel
satisfied
classy
active
glory




#### South

loyal
humble
consistent
sins
god
honest
therefore
christ
remain
males
forgive
witness
lord
blessed
men
grateful
forever
satisfied
thankful
patient


In [25]:
compare('swing')

# swing

#### New England

eager
action
closest
district
porch
punk
full
direction
bat
bounce
favor
impress
roll
congressional
moves
come
jam
golf
into
ready




#### South

porch
seat
foot
angle
roll
set
patio
louisiana
grandpa
eager
traditional
jump
groove
frame
action
dancing
diamond
eternal
front
feet


In [26]:
compare('spirits')

# spirits

#### New England

wine
liquor
lift
harvest
craft
brewing
divine
ridge
download
125
whiskey
seeds
turtle
samples
inn
bottles
spirit
grill
garlic
🍻




#### South

evil
bones
log
tempting
flesh
tribe
minds
maya
trials
lift
mountains
skies
guardian
<unk>
brewing
spirit
smiles
pouring
tastes
بين


In [27]:
compare('folk')

# folk

#### New England

artists
festival
fest
punk
bands
tunes
music
art
abstract
irish
comedy
indie
songs
musicians
contemporary
band
jazz
karaoke
comedian
exhibit




#### South

folks
mfs
art
doll
blacks
ish
whites
ppl
streets
african
asian
educated
females
roots
peoples
native
white
voices
tryin
aye


In [29]:
compare('man')

# man

#### New England

dude
woman
guy
spider
himself
masked
wtf
bro
stealing
bicycle
caught
aw
insane
cops
he
touching
nigga
boy
handsome
arrest




#### South

dude
bruh
woman
boy
wtf
bro
nigga
damn
smh
😂😂😂
lmao
he
swear
mans
lmfao
guy
nah
hell
😂😂
lol


# Comparisons with concept diffs

In [30]:
compare('earth')

# earth

#### New England

planet
flat
heaven
mother
humans
moon
world
mars
nasa
ocean
universe
nature
alien
science
worlds
exist
god
happiest
destroy
human




#### South

planet
heaven
humans
mother
god
nasa
nature
happiest
dust
world
aliens
dance
space
human
greatest
found
🌎
mars
above
the


In [31]:
compare('nazi')

# nazi

#### New England

nazis
neo
fascist
supremacist
kkk
supremacists
racist
hitler
antifa
racists
traitor
communist
terrorist
leftist
supremacy
scum
blm
alt
socialist
anti




#### South

neo
nazis
fascist
supremacist
kkk
racist
antifa
supremacists
hitler
blm
communist
racists
terrorist
supremacy
terrorists
germany
soros
propaganda
leftist
socialist


In [32]:
compare('russia')

# russia

#### New England

putin
russian
collusion
russians
comey
investigation
probe
iran
trump
flynn
hillary
election
hacking
fbi
syria
clinton
djt
sanctions
moscow
ties




#### South

putin
russian
collusion
trump
comey
probe
russians
hacking
iran
sanctions
flynn
syria
investigation
fbi
election
mueller
kushner
hillary
dems
clinton


In [33]:
compare('god')

# god

#### New England

bless
lord
jesus
pray
christ
swear
gods
dammit
oh
dear
damn
blessed
amen
praying
goodness
praise
knows
holy
blessing
🙏




#### South

lord
jesus
christ
bless
pray
faith
blessed
🙏🏾
gods
praise
amen
spirit
thankful
grace
🙏🏽
blessing
glory
praying
father
holy


In [37]:
compare('he', 50)

# he

#### New England

she
said
him
himself
knows
probably
his
clearly
thinks
think
knew
that
but
obviously
wasn
guy
who
djt
didn
hasn
wouldn
doesn
says
because
dude
did
if
once
apparently
maybe
was
actually
wants
potus
then
yeah
they
told
would
also
saying
when
cuz
dt
someone
nobody
trump
hes
believes
never




#### South

him
she
his
himself
said
knows
told
think
wouldn
obviously
but
that
probably
because
wasn
didn
knew
dude
did
actually
when
was
who
if
would
wants
hasn
mean
they
yeah
doesn
thinks
cuz
someone
thought
not
guy
clearly
know
yea
gave
maybe
like
does
then
even
whoever
believes
saying
has


In [39]:
compare('depression', 50)

# depression

#### New England

anxiety
stress
illness
pain
dealing
suffering
syndrome
causes
mental
symptoms
allergies
anger
headache
disease
meds
suffer
prevent
tropical
grief
addiction
stroke
naps
ease
struggles
pregnancy
depressed
treating
managing
knee
panic
nap
recovery
relief
diabetes
mood
cancer
injury
manage
awareness
reduce
brain
patients
during
harvey
hunger
hiv
exercise
emotional
risk
caused




#### South

anxiety
illness
tropical
suffering
stress
causes
dealing
addiction
meds
insomnia
suffer
pain
depressed
mental
grief
experiencing
anger
disease
symptoms
syndrome
crisis
laughter
headache
panic
caused
awareness
suicide
flooding
tooth
risk
fever
prevent
issues
overcome
struggles
tragedy
struggle
tolerance
guilt
risks
constant
allergies
bullying
stroke
emo
breakdown
hurricanes
related
literal
situations
