In [1]:
import numpy as np
import pandas as pd

from gensim.models import KeyedVectors
from scipy.spatial.distance import cosine
from tqdm import tqdm_notebook

from sources import parse_glove_vocab

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [3]:
m1 = KeyedVectors.load_word2vec_format('../data/corpora/north-south/new-england.w2v.txt')

In [4]:
m2 = KeyedVectors.load_word2vec_format('../data/corpora/north-south/deep-south.w2v.txt')

In [5]:
combined = KeyedVectors.load_word2vec_format('../data/corpora/north-south/combined.w2v.txt')

In [6]:
vocab = parse_glove_vocab('../data/corpora/north-south/vocab.txt')

In [43]:
def concept_diff(m1, m2, seed, depth=50, topn=20):
    
    m1_sim = [t for t, _ in m1.most_similar(seed, topn=depth)]
    m2_sim = [t for t, _ in m2.most_similar(seed, topn=depth)]
    
    m1_avg = np.array([combined[t] for t in m1_sim]).mean(0)
    m2_avg = np.array([combined[t] for t in m2_sim]).mean(0)
    
    return combined.similar_by_vector(m1_avg-m2_avg, topn=topn)

In [44]:
def north(seed, *args, **kwargs):
    return concept_diff(m1, m2, seed, *args, **kwargs)

In [45]:
def south(seed, *args, **kwargs):
    return concept_diff(m2, m1, seed, *args, **kwargs)

In [46]:
def compare(token, *args, **kwargs):
    for t, _ in north(token, *args, **kwargs):
        print(t)
    print('\n')
    for t, _ in south(token, *args, **kwargs):
        print(t)

In [71]:
compare('man')

masked
arrest
allegedly
police
arrested
accused
alleged
officers
records
authorities
reportedly
seeks
inmates
deaths
أعوذ
investigating
إني
cops
captured
targeted


lmao
lmaoo
lmfao
lol
😂
😂😂
lmaooo
😂😂😂
😂😂😂😂
😭😭
😭😭😭
😭
bruh
lmaoooo
tho
nah
😂😂😂😂😂
lmfaooo
lmfaoo
😂😭


In [26]:
compare('earth')

economic
environment
policies
destroying
climate
environmental
scientists
infrastructure
careers
epa
affects
threaten
political
managing
destroy
economy
solve
networks
threatens
communities


lord
jesus
grace
sweet
christ
boy
mama
shall
praise
god
love
brother
heaven
tune
😍
glory
blessed
baby
shawn
i


In [27]:
compare('nazi')

idiots
idiot
stupid
morons
moron
dumbass
ignorant
dumb
assholes
asshole
smh
clueless
supporters
sir
asses
kidding
fool
delusional
bunch
fuckin


islamic
radical
egypt
jewish
divine
terrorism
israel
romance
cultural
elements
islam
meets
catholic
initiative
developed
collaboration
pakistan
lover
isis
weapon


In [28]:
compare('liberal')

establishment
politician
dem
politicians
racists
republican
roy
democratic
anti
democrat
moore
uncle
corrupt
swamp
blacks
dc
mt
reps
senator
candidate


msnbc
cnn
narrative
nbc
channel
news
update
briefing
espn
wednesday
unknown
watch
train
story
iphone
wrap
edge
buzzfeed
vlog
twist


In [30]:
compare('russia')

cheated
hacked
murdered
raped
exposed
robbed
cheating
gettin
alleged
got
married
stabbed
seth
wives
ethan
amanda
admitted
deleted
allegedly
lied


congress
china
tillerson
council
budget
agenda
policy
sessions
environment
press
president
oppose
toward
senate
conference
leadership
america
education
moving
session


In [31]:
compare('god')

😭😭
ugh
😭
dang
😭😭😭
damn
omg
😂😂😂😂
😂😂😂
wtf
lol
lmao
😂😂
bruh
lmfao
😑
oh
fucking
😭😭😭😭
gosh


given
shall
whom
sacrifice
ability
serve
therefore
seek
willing
desire
overcome
others
faithful
among
receive
honor
nor
circumstances
fear
understanding


In [32]:
compare('clinton')

searching
primary
select
voters
probe
explosion
listings
interior
northern
mplus
stores
buildings
steam
investigating
sale
residents
elementary
uk
supporters
homes


pos
traitor
hypocrite
liar
lying
moron
loser
disgusting
disgrace
shame
asshole
aaron
a
bill
coward
treason
idiot
bullshit
goat
jake


In [37]:
compare('hands')

devices
phones
are
accountable
voters
customers
people
consumers
many
supported
foreign
aware
millennials
considering
aren
ppl
clients
apps
responsible
companies


chest
throat
punch
finger
catch
deck
throws
balls
cage
td
breath
touchdown
rodgers
bird
fish
dragon
toe
upper
superman
sweat


In [38]:
compare('mother')

beautiful
blessed
gorgeous
woman
classy
stunning
powerful
lovely
faithful
inspirational
truly
attractive
divine
strong
chic
necklace
pretty
sexy
inspiration
thankful


kids
babies
parents
illegals
medicare
children
meds
adults
drugs
weren
cartoons
medicaid
recall
pets
olds
gen
vets
allergies
programs
jokes


In [55]:
compare('media', depth=50)

corrupt
lying
dems
traitor
democrats
potus
democrat
msm
gop
he
politicians
elected
djt
idiot
republican
bs
obama
disgrace
hrc
asshole


wordpress
website
tools
photography
seo
instagram
tips
apps
online
design
pinterest
app
analytics
sites
blog
web
pages
page
site
linkedin


In [68]:
compare('american', depth=50)

comics
horror
film
western
wood
wars
movies
marvel
french
series
flash
box
superhero
toy
movie
discussion
art
sci
photography
films


approval
administration
taxpayers
states
united
irs
سبحانك
law
illegal
therefore
taxes
feds
president
government
mexico
federal
trusted
graduated
tax
seeking


In [69]:
compare('home', depth=50)

estate
sales
market
listings
automation
businesses
rental
cars
marketing
homes
property
virtual
luxury
mls
enterprise
platform
analytics
properties
premium
بسم


sleep
nap
😴
woke
asleep
wake
bed
goodnight
sleepy
lay
knowing
up
waking
hungry
suppose
awake
😌
rest
then
feeling
