# Concept subtraction

In [1]:
import numpy as np
import pandas as pd

from gensim.models import KeyedVectors
from scipy.spatial.distance import cosine
from tqdm import tqdm_notebook

from sources import parse_glove_vocab

from IPython.display import Markdown, display

In [2]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [3]:
m1 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/new-england.w2v.txt')

In [4]:
m2 = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/deep-south.w2v.txt')

In [5]:
combined = KeyedVectors.load_word2vec_format('../../data/corpora/north-south/combined.w2v.txt')

In [6]:
vocab = parse_glove_vocab('../../data/corpora/north-south/vocab.txt')

In [107]:
def concept_diff(m1, m2, seed, depth=50, topn=20):
    
    m1_sim = [t for t, _ in m1.most_similar(seed, topn=depth)]
    m2_sim = [t for t, _ in m2.most_similar(seed, topn=depth)]
    
    m1_avg = np.array([combined[t] for t in m1_sim]).mean(0)
    m2_avg = np.array([combined[t] for t in m2_sim]).mean(0)
    
    return combined.similar_by_vector(m1_avg-m2_avg, topn=topn)

In [55]:
def north(seed, *args, **kwargs):
    return concept_diff(m1, m2, seed, *args, **kwargs)

In [56]:
def south(seed, *args, **kwargs):
    return concept_diff(m2, m1, seed, *args, **kwargs)

In [57]:
def compare(token, *args, **kwargs):
    display(Markdown(f'# {token}'))
    
    display(Markdown(f'#### New England'))
    print(' '.join([t for t, _ in north(token, *args, **kwargs)]))
    print('\n')
    
    display(Markdown(f'#### South'))
    print(' '.join([t for t, _ in south(token, *args, **kwargs)]))

In [58]:
compare('tide')

# tide

#### New England

fog wax skies temperature hum barometer ப س diamonds concrete ا mist र ن ம த amounts ي floating invisible




#### South

football auburn coach qb baseball players alabama offense basketball coaches nfl patriots wr falcons ncaa teams offensive ole player defense


In [59]:
compare('wicked')

# wicked

#### New England

sooo soooo sooooo so pretty soo nice too cool cute really af soooooo kinda dang good 😊 though very fun




#### South

shall therefore fear matthew whom thy christ flesh sins satan mercy destruction sin glory eternal darkness evil weapon thou enemy


In [60]:
compare('subway')

# subway

#### New England

car vehicle bike train crash bus driving road plane accident highway wreck vehicles route driver truck cars traffic passenger lane




#### South

pancakes garlic bread soup chocolate cookies chicken bacon spicy fries butter toast nuggets fried rice shrimp vanilla peanut pasta cheesecake


In [61]:
compare('happiness')

# happiness

#### New England

eternal pure grace ♥ healing filled glorious divine revival يارب appreciation strength crystal ✨ 📹 glory continued ✔ إن kingdom




#### South

decide can deciding do depend shouldn sell determine afford anybody if manage should yourself don pick think choose either trying


In [62]:
compare('earth')

# earth

#### New England

economic environment policies destroying climate environmental scientists infrastructure careers epa affects threaten political managing destroy economy solve networks threatens communities




#### South

lord jesus grace sweet christ boy mama shall praise god love brother heaven tune 😍 glory blessed baby shawn i


In [63]:
compare('hands')

# hands

#### New England

devices phones are accountable voters customers people consumers many supported foreign aware millennials considering aren ppl clients apps responsible companies




#### South

chest throat punch finger catch deck throws balls cage td breath touchdown rodgers bird fish dragon toe upper superman sweat


In [64]:
compare('god')

# god

#### New England

😭😭 ugh 😭 dang 😭😭😭 damn omg 😂😂😂😂 😂😂😂 wtf lol lmao 😂😂 bruh lmfao 😑 oh fucking 😭😭😭😭 gosh




#### South

given shall whom sacrifice ability serve therefore seek willing desire overcome others faithful among receive honor nor circumstances fear understanding


In [18]:
compare('meaning')

# meaning

#### New England

sometimes gotta lol ima outta maybe haha 🙃 imma 😂 😅 😂😂 probably leg idk between 😩 nights finish 🙄




#### South

hatred protecting identity democracy equality anti created racism corruption creation freedom promoting hateful launched defending integrity summit fascist continued supported


In [19]:
compare('memory')

# memory

#### New England

forgive suffer knowing apologize pray suffering moment never hurt feelings realize prayers recognize god admit actions difficult consequences remember sacrifice




#### South

vintage fabric wood handmade tablet candle silver floral copper jewelry earrings sterling decor ebay print giveaway leaf kit glass collection


In [20]:
compare('education')

# education

#### New England

leaders systems ppl thinks how relationships environment want where argue conservatives cities friends change things thinking can going know babies




#### South

donation scholarship receives grant pardon exchange ref harvest triple extension charity eagle additional donating apology crown shawn jar fundraiser trophy


In [21]:
compare('culture')

# culture

#### New England

content social learning writing using use journalism marketing reporting media often relevant engaging posting data technology science stories tools computer




#### South

soul duke coin chi saint cha np raiders blaze boogie premier inn knights atlantic hoodie volleyball francis clash pit pats


In [22]:
compare('church')

# church

#### New England

ancient roman egypt francis israel plain coin saint edward pope accent land crown australia threatens king william prince banks statue




#### South

tomorrow evening starting attending afternoon monday session register early attend hosting week volunteers morning sunday staff joining pre seniors students


In [23]:
compare('god')

# god

#### New England

lmao lol lmfao bruh dude lmaoo 😂😂 damn 😂😂😂 😂😂😂😂 fucking lmaooo 😂 😭😭 omg lmaoooo wtf yeah fuck 😭




#### South

compassion worship faith seek chosen fellowship desire therefore greater among spirit leaders seeking thy sacrifice provides nations restore prayer wisdom


In [24]:
compare('reflection')

# reflection

#### New England

values moral good th great belief principles and at wisdom beliefs fighting politics enjoy knowledge your man beyond political given




#### South

xl large jersey flop خير painted த اللهم misses xxl وأنت إلا small purple wave ப floral mens إني العظيم


In [65]:
compare('russia')

# russia

#### New England

cheated hacked murdered raped exposed robbed cheating gettin alleged got married stabbed seth wives ethan amanda admitted deleted allegedly lied




#### South

congress china tillerson council budget agenda policy sessions environment press president oppose toward senate conference leadership america education moving session


In [31]:
compare('man')

# man

#### New England

allegedly masked arrested accused arrest أعوذ alleged police charged inmates captured reportedly dui authorities بك seeks ferrari escaped officers crashes




#### South

lol lmao lmfao tho lmaoo 😂 bruh 😂😂 lmaooo 😂😂😂 😂😂😂😂 cuz yall nah 😭 😭😭 sis 😭😭😭 lmaoooo damn


In [66]:
compare('red')

# red

#### New England

mets yankees cubs sox mlb indians tigers rangers knicks hawks espn baseball wire game nhl fans pirates playoffs pitching nfl




#### South

colored purple beans glitter lace lipstick pink bean vanilla cinnamon cocktail silk color earrings light butter lemon floral coconut liquid


In [67]:
compare('green')

# green

#### New England

celtics jersey size xl adidas boston cap shirt jacket mens medium hoodie sleeve nba nike tee large men hat hockey




#### South

pork baked greens shrimp fried cinnamon potatoes sausage salmon pancakes yum bake cheesecake pasta garlic strawberry peppers potato rice toast


In [68]:
compare('blue')

# blue

#### New England

moon bounce track playing sky under night limit alcohol atl pull ralph keeping starts club hills pace fed might keeps




#### South

denim fabric floral closet chic leggings skirt diy vintage poshmark jewelry earrings handmade furniture colorful womens traditional dresses lace candle


In [69]:
compare('yellow')

# yellow

#### New England

shirt sleeve tee xl gear size medium jacket logo jersey polo shirts mens fit sweater large nwt gray hat l




#### South

beyonce beyoncé blind ariana heartless cardi amber ghetto mainstream locals trey vocals selena thunder moderate cyrus miley psycho rihanna whom


In [70]:
compare('purple')

# purple

#### New England

glass jewelry art painted nail necklace wood concrete pendant fence unique baking mirror candle paint soap tree copper eye earrings




#### South

jacket xl sleeve size shorts medium zip skirt womens hoodie polo shirt jean denim mens pockets pants tee nwt lauren


In [96]:
compare('france')

# france

#### New England

05 03 04 02 01 09 06 08 07 september january june august 27 31 february 21 april 28 00am




#### South

spine etc بسم فيمن unlike politician pakistan debate ta base trump un agrees putin المنزل cia sanctions dt consequences military


In [115]:
compare('earth', depth=50)

# earth

#### New England

economic environment policies destroying climate environmental scientists infrastructure careers epa affects threaten political managing destroy economy solve networks threatens communities




#### South

lord jesus grace sweet christ boy mama shall praise god love brother heaven tune 😍 glory blessed baby shawn i


In [120]:
', '.join([t for t, _ in m2.most_similar('earth', topn=50)])

'planet, heaven, humans, mother, god, nasa, nature, happiest, dust, world, aliens, dance, space, human, greatest, found, 🌎, mars, above, the, living, mysterious, jesus, upon, proof, land, flat, lord, shadow, sized, alien, science, shall, moon, happy, christ, thing, gods, grace, scum, celebrate, remember, place, alive, angel, beautiful, animals, born, literally, ship'