# Concept subtraction

In [3]:
import numpy as np
import pandas as pd

from gensim.models import KeyedVectors
from scipy.spatial.distance import cosine
from tqdm import tqdm_notebook

from sources import parse_glove_vocab

from IPython.display import Markdown, display

In [4]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [7]:
m1 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/new-england.w2v.txt')

In [8]:
m2 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/deep-south.w2v.txt')

In [9]:
combined = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/combined.w2v.txt')

In [11]:
vocab = parse_glove_vocab('../../data/corpora/north-south/vocab.txt')

In [12]:
def concept_diff(m1, m2, seed, depth=50, topn=20):
    
    m1_sim = [t for t, _ in m1.most_similar(seed, topn=depth)]
    m2_sim = [t for t, _ in m2.most_similar(seed, topn=depth)]
    
    m1_avg = np.array([combined[t] for t in m1_sim]).mean(0)
    m2_avg = np.array([combined[t] for t in m2_sim]).mean(0)
    
    return combined.similar_by_vector(m1_avg-m2_avg, topn=topn)

In [13]:
def north(seed, *args, **kwargs):
    return concept_diff(m1, m2, seed, *args, **kwargs)

In [14]:
def south(seed, *args, **kwargs):
    return concept_diff(m2, m1, seed, *args, **kwargs)

In [15]:
def compare(token, *args, **kwargs):
    display(Markdown(f'# {token}'))
    display(Markdown(f'#### New England'))
    for t, _ in north(token, *args, **kwargs):
        print(t)
    print('\n')
    display(Markdown(f'#### South'))
    for t, _ in south(token, *args, **kwargs):
        print(t)

In [16]:
compare('man')

# man

#### New England

masked
arrest
allegedly
police
arrested
accused
alleged
officers
records
authorities
reportedly
seeks
inmates
deaths
أعوذ
investigating
إني
cops
captured
targeted




#### South

lmao
lmaoo
lmfao
lol
😂
😂😂
lmaooo
😂😂😂
😂😂😂😂
😭😭
😭😭😭
😭
bruh
lmaoooo
tho
nah
😂😂😂😂😂
lmfaooo
lmfaoo
😂😭


In [17]:
compare('earth')

# earth

#### New England

economic
environment
policies
destroying
climate
environmental
scientists
infrastructure
careers
epa
affects
threaten
political
managing
destroy
economy
solve
networks
threatens
communities




#### South

lord
jesus
grace
sweet
christ
boy
mama
shall
praise
god
love
brother
heaven
tune
😍
glory
blessed
baby
shawn
i


In [18]:
compare('nazi')

# nazi

#### New England

idiots
idiot
stupid
morons
moron
dumbass
ignorant
dumb
assholes
asshole
smh
clueless
supporters
sir
asses
kidding
fool
delusional
bunch
fuckin




#### South

islamic
radical
egypt
jewish
divine
terrorism
israel
romance
cultural
elements
islam
meets
catholic
initiative
developed
collaboration
pakistan
lover
isis
weapon


In [19]:
compare('liberal')

# liberal

#### New England

establishment
politician
dem
politicians
racists
republican
roy
democratic
anti
democrat
moore
uncle
corrupt
swamp
blacks
dc
mt
reps
senator
candidate




#### South

msnbc
cnn
narrative
nbc
channel
news
update
briefing
espn
wednesday
unknown
watch
train
story
iphone
wrap
edge
buzzfeed
vlog
twist


In [20]:
compare('russia')

# russia

#### New England

cheated
hacked
murdered
raped
exposed
robbed
cheating
gettin
alleged
got
married
stabbed
seth
wives
ethan
amanda
admitted
deleted
allegedly
lied




#### South

congress
china
tillerson
council
budget
agenda
policy
sessions
environment
press
president
oppose
toward
senate
conference
leadership
america
education
moving
session


In [34]:
compare('trump')

# trump

#### New England

he
knows
told
shawn
asked
met
him
knew
his
jr
was
himself
impressed
justin
said
got
josh
somebody
wasn
talked




#### South

conservatives
oppose
liberals
repeal
republicans
obamacare
democrats
racists
radical
trumpcare
socialist
boycott
policies
legislation
lawmakers
leftist
americans
conservative
communist
reform


In [38]:
compare('obama')

# obama

#### New England

him
he
asked
told
i
chair
tweeted
said
dude
when
somebody
seat
got
election
idk
bf
knows
comey
sleep
his




#### South

immigrant
immigrants
immigration
illegal
daca
refugee
discrimination
sanctuary
illegals
welfare
aliens
sharia
berkeley
criminals
workplace
refugees
funded
thugs
reform
existing


In [21]:
compare('god')

# god

#### New England

😭😭
ugh
😭
dang
😭😭😭
damn
omg
😂😂😂😂
😂😂😂
wtf
lol
lmao
😂😂
bruh
lmfao
😑
oh
fucking
😭😭😭😭
gosh




#### South

given
shall
whom
sacrifice
ability
serve
therefore
seek
willing
desire
overcome
others
faithful
among
receive
honor
nor
circumstances
fear
understanding


In [22]:
compare('clinton')

# clinton

#### New England

searching
primary
select
voters
probe
explosion
listings
interior
northern
mplus
stores
buildings
steam
investigating
sale
residents
elementary
uk
supporters
homes




#### South

pos
traitor
hypocrite
liar
lying
moron
loser
disgusting
disgrace
shame
asshole
aaron
a
bill
coward
treason
idiot
bullshit
goat
jake


In [23]:
compare('hands')

# hands

#### New England

devices
phones
are
accountable
voters
customers
people
consumers
many
supported
foreign
aware
millennials
considering
aren
ppl
clients
apps
responsible
companies




#### South

chest
throat
punch
finger
catch
deck
throws
balls
cage
td
breath
touchdown
rodgers
bird
fish
dragon
toe
upper
superman
sweat


In [39]:
compare('heart')

# heart

#### New England

risk
reduce
risks
linked
increased
related
rate
prevent
associated
research
المنزل
cell
carbon
disease
diabetes
increase
health
management
improve
drug




#### South

sooo
soooo
omg
love
soo
😍
awww
😍😍😍
so
awe
aww
😭❤
😊
ugh
thank
sooooo
😍😍
bless
awwww
aw


In [24]:
compare('mother')

# mother

#### New England

beautiful
blessed
gorgeous
woman
classy
stunning
powerful
lovely
faithful
inspirational
truly
attractive
divine
strong
chic
necklace
pretty
sexy
inspiration
thankful




#### South

kids
babies
parents
illegals
medicare
children
meds
adults
drugs
weren
cartoons
medicaid
recall
pets
olds
gen
vets
allergies
programs
jokes


In [35]:
compare('media')

# media

#### New England

corrupt
lying
dems
traitor
democrats
potus
democrat
msm
gop
he
politicians
elected
djt
idiot
republican
bs
obama
disgrace
hrc
asshole




#### South

wordpress
website
tools
photography
seo
instagram
tips
apps
online
design
pinterest
app
analytics
sites
blog
web
pages
page
site
linkedin


In [63]:
compare('depression')

# depression

#### New England

cancer
breast
patients
surgery
exercise
improve
reduce
recovery
research
treatment
study
brain
hospital
diabetes
workout
program
efficient
helps
training
programs




#### South

identity
confusion
irony
tragedy
fears
mentality
weirdest
experiencing
lies
racists
outrage
nightmares
literal
snowflakes
irl
thief
ironic
religious
tapes
supremacy


In [36]:
compare('american')

# american

#### New England

comics
horror
film
western
wood
wars
movies
marvel
french
series
flash
box
superhero
toy
movie
discussion
art
sci
photography
films




#### South

approval
administration
taxpayers
states
united
irs
سبحانك
law
illegal
therefore
taxes
feds
president
government
mexico
federal
trusted
graduated
tax
seeking


In [37]:
compare('home')

# home

#### New England

estate
sales
market
listings
automation
businesses
rental
cars
marketing
homes
property
virtual
luxury
mls
enterprise
platform
analytics
properties
premium
بسم




#### South

sleep
nap
😴
woke
asleep
wake
bed
goodnight
sleepy
lay
knowing
up
waking
hungry
suppose
awake
😌
rest
then
feeling


In [48]:
compare('blood')

# blood

#### New England

reduce
impact
managing
improve
increase
boost
levels
costs
affect
uncharacteristical
prevent
rate
financial
stress
research
overall
energy
risk
cancer
cost




#### South

jesus
christ
hood
washed
god
choir
boy
playing
mud
stood
came
lamb
walked
lord
window
faithful
ugly
died
turned
uncle


In [41]:
compare('i')

# i

#### New England

saw
recently
visited
icymi
wrote
forgot
did
brought
segment
shared
originally
showed
created
attended
had
similar
came
stole
reminds
spoke




#### South

anymore
m
😴
😤
♀
😪
🙃
🏻
am
still
🏼
😕
😌
🙄
im
😭
♂
😔
idc
🏾


In [46]:
compare('he')

# he

#### New England

potus
djt
trump
dt
prez
administration
presidency
trumps
breitbart
president
pence
wh
donald
admin
russia
bannon
putin
resign
traitor
cnn




#### South

idk
bruh
lol
tho
lmao
damn
nah
🏾
tbh
like
😭
some
lmaoo
yea
i
tf
😭😭
yeah
😂😂
imma


In [47]:
compare('she')

# she

#### New England

believes
voters
politician
hrc
potus
democrat
gop
djt
dnc
claims
supported
republican
doj
whether
unable
directly
وأعوذ
candidate
who
enters




#### South

mama
momma
lil
bruh
baby
finna
sis
😭😭
😭
ain
asf
fr
dawg
bout
😂😂
tho
😂😂😂
bro
😭😭😭
ima


# Stability / significance concerns

In [80]:
compare('earth', 40)

# earth

#### New England

scientists
climate
economic
environment
careers
managing
effects
affects
solve
environmental
species
policies
impact
diff
science
humans
communities
epa
resources
global




#### South

lord
jesus
praise
shall
christ
glory
god
grace
amen
heaven
mercy
matthew
worship
goodness
whom
choir
faithful
shawn
gods
holy


In [81]:
compare('hillary', 50)

# hillary

#### New England

mama
momma
mommy
shawty
smile
wassup
cause
me
cuz
beyoncé
ik
mom
fan
mia
deadass
😭
bro
ima
she
bruh




#### South

treason
russian
democratic
elected
republican
congress
elections
officials
government
senate
democracy
traitor
gop
corruption
govt
collusion
citizens
russia
politicians
lawmakers


In [79]:
compare('tide')

# tide

#### New England

fog
wax
skies
temperature
hum
barometer
ப
س
diamonds
concrete
ا
mist
र
ن
ம
த
amounts
ي
floating
invisible




#### South

football
auburn
coach
qb
baseball
players
alabama
offense
basketball
coaches
nfl
patriots
wr
falcons
ncaa
teams
offensive
ole
player
defense
