In [1]:
from collections import OrderedDict

from gensim.models import Word2Vec

In [2]:
bos = Word2Vec.load('../data/models/boston.bin')

In [3]:
sf = Word2Vec.load('../data/models/sf.bin')

In [4]:
def get_token_counts(model):
    counts = [(k, v.count) for k, v in model.wv.vocab.items()]
    return OrderedDict(sorted(counts, key=lambda r: r[1], reverse=True))

In [5]:
bos_counts = get_token_counts(bos)

In [6]:
sf_counts = get_token_counts(sf)

In [7]:
bos_top = set(list(bos_counts.keys())[:500])

In [8]:
sf_top = set(list(sf_counts.keys())[:500])

In [9]:
vocab = set.intersection(bos_top, sf_top)

In [10]:
len(vocab)

417

In [11]:
def similarity(m1, m2, token):
    
    m1_sim = {}
    for v in vocab:
        sim = m1.wv.similarity(v, token)
        m1_sim[v] = sim
    
    m2_sim = {}
    for v in vocab:
        sim = m2.wv.similarity(v, token)
        m2_sim[v] = sim
        
    delta = 0
    for v in vocab:
        delta += abs(m1_sim[v] - m2_sim[v])
        
    return delta

In [12]:
deltas = []
for v in vocab:
    deltas.append((v, similarity(bos, sf, v)))

In [17]:
def compare(word):
    print('BOS')
    words = [w for w, d in bos.wv.most_similar([word], topn=20)]
    print(', '.join(words))
    print('\nSF')
    words = [w for w, d in sf.wv.most_similar([word], topn=20)]
    print(', '.join(words))

In [18]:
compare('shot')

BOS
shots, layup, possession, shooting, penalty, buzzer, rebound, foul, rim, timeout, shoots, fastball, bat, game, ball, rifle, breakaway, goal, bullets, pistol

SF
shots, shootout, shooting, shoots, picture, wounded, possession, injured, night, pic, knife, photo, robbed, thief, stabbed, scene, shoot, struck, man, footage


In [19]:
compare('team')

BOS
teams, player, franchise, division, organization, players, roster, game, league, teammates, qb, base, talent, offense, staff, goal, tribe, company, position, country

SF
organization, teams, partner, community, client, company, staff, opponent, crew, position, program, platform, audience, career, network, talent, product, site, sponsorship, campaign


In [20]:
compare('space')

BOS
spaces, buildings, room, storage, amenities, energy, rooms, equipment, garage, exterior, capacity, apartment, place, electricity, electronics, fabric, sensors, vehicles, sunlight, transport

SF
spaces, satellites, mission, outer, storage, light, astronauts, room, rooms, telescope, area, transit, spacecraft, power, vcloud, drones, sunlight, garage, modular, atmosphere
