In [6]:
import pickle
import csv
import os
from typing import Set, Tuple, NamedTuple, List, Dict, Counter, Optional

import torch
import numpy as np
from scipy.spatial import distance
from scipy.stats import spearmanr

from evaluations.intrinsic_eval import Embedding, PhrasePair
# from recomposer import Decomposer, Recomposer, RecomposerConfig
from decomposer import Decomposer, DecomposerConfig


torch.manual_seed(42)
np.random.seed(42)

### Load Pretrained Embedding

In [4]:
pretrained = Embedding('../../data/pretrained_word2vec/for_real.txt', 'plain_text')

vocab_size = 111,387, num_dimensions = 300
Loading embeddings from ../../data/pretrained_word2vec/for_real.txt
Done


In [18]:
pretrained = Embedding('../../data/pretrained_word2vec/bill_mentions_HS.txt', 'plain_text')

vocab_size = 24,005, num_dimensions = 300
Loading embeddings from ../../data/pretrained_word2vec/bill_mentions_HS.txt
Done


In [23]:
pretrained.nearest_neighbor('health_care', top_k=20)

0.9415	health_care_system
0.9310	seniors
0.9150	coverage
0.9148	care
0.9078	senior_citizens
0.9072	medicaid
0.9066	affordable
0.9020	medicare
0.8996	medicare_beneficiaries
0.8989	access_to_health
0.8980	lowincome
0.8974	uninsured
0.8937	health_care_coverage
0.8936	health_coverage
0.8928	elderly
0.8916	medical_care
0.8906	health_care_costs
0.8800	health_insurance
0.8783	beneficiaries
0.8755	medicare_and_medicaid




### Denotation Space: Want to see stuff from other party

In [3]:
base_dir = '../../results/'
# deno_space = Embedding(
#     base_dir + 'L1/epoch50.pt', 'decomposer', device=torch.device('cuda:0'))
cono_space = Embedding(
    base_dir + 'affine/L4 0c -10b/epoch100.pt', 'decomposer', device=torch.device('cuda:0'))

In [None]:
# Denotation Space: Want to see stuff from other party
# model.embedding = model.deno_embed
# q1 = 'universal_health_care'  # OOV
# q2 = 'socialized_medicine'
model = deno_space
q1 = 'singlepayer'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'public_option'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'independent_expenditures'
q2 = 'political_speech'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'tax_breaks'
q2 = 'tax_relief'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'socialized_medicine' # 'obamacare'
q2 = 'health_care_reform'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

### Connotation Space: Want to see unrelated random entities of the same party

In [8]:
model = cono_space
q1 = 'gun_control'
q2 = 'illegal_aliens'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.16207344830036163
0.09589895606040955
0.8222	the_brady_bill
0.7669	gun_safety
0.7631	gun_laws
0.7600	lawabiding_citizens
0.7559	guncontrol
0.7442	nra
0.7407	antigun
0.7371	brady_bill
0.7358	handguns
0.7319	assault_weapons


0.6633	gun_laws
0.6419	the_brady_bill
0.6297	waiting_period
0.5598	gun_safety
0.5489	national_rifle_association
0.5309	nra
0.4741	atf
0.4714	anticrime
0.4701	the_juvenile_justice_bill
0.4699	handgun




0.9159	illegal_immigrants
0.8867	undocumented
0.8659	aliens
0.8648	illegals
0.7993	undocumented_immigrants
0.7877	country_illegally
0.7815	undocumented_workers
0.7726	illegal_immigration
0.7683	undocumented_aliens
0.7680	alien


0.7277	undocumented
0.7122	illegal_immigrants
0.6583	illegals
0.6511	aliens
0.6367	noncitizens
0.5642	deportable
0.5461	illegal_immigration
0.5415	illegally
0.5338	deport
0.4908	amnesty




In [7]:
model = cono_space
q1 = 'wall_street_reform'
q2 = 'civil_rights'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.1166476458311081
0.1566346436738968
0.8053	financial_reform
0.7396	the_wall_street_reform_bill
0.7279	financial_regulatory_reform
0.7221	doddfrank
0.7080	financial_sector
0.7078	big_banks
0.7066	the_wall_street_reform
0.7063	financial_industry
0.6924	consumer_protection_act
0.6854	wall_street


0.6086	financial_reform
0.5065	big_banks
0.4638	main_street
0.4132	bailouts
0.4081	financial_system
0.3950	wall_street
0.3798	big_to_fail
0.3715	backroom
0.3694	protect_the_american
0.3686	energy_bill




0.7461	civil_rights_act
0.7283	the_civil_rights_acts
0.7187	the_civil_rights_act
0.7152	civil_rights_laws
0.7144	voting_rights
0.7037	civil_rights_acts
0.7025	civilrights
0.6876	equality
0.6810	voting_rights_acts
0.6805	the_voting_rights_act


0.4645	us_economy
0.4478	manufacturing_sector
0.4206	economic_growth
0.4151	nation
0.4064	american_businesses
0.3997	increase
0.3972	offshoring
0.3968	economic_policy
0.3962	aerospace
0.3931	exploration




In [6]:
model = cono_space
q1 = 'nuclear_arms_race'
q2 = 'credit_card'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.028139857575297356
0.037893738597631454
0.9371	arms_race
0.8938	nuclear_arms
0.8280	superpowers
0.8207	nuclear_holocaust
0.8139	threat_of_nuclear
0.8111	nuclear_arsenals
0.8082	nuclear_war
0.8060	nuclear_arms_control
0.7886	nuclear_freeze
0.7539	two_superpowers


0.7375	arms_race
0.7291	nuclear_freeze
0.7149	superpowers
0.7010	nuclear_war
0.5918	arsenals
0.5467	madness
0.5420	verifiable
0.5302	armaments
0.5049	nuclear_warheads
0.4871	builddown




0.9299	credit_cards
0.8303	credit_card_companies
0.8052	cardholders
0.7782	cardholder
0.7708	credit_card_debt
0.7680	card_companies
0.7297	credit_cardholders
0.7262	maxed
0.7238	issuers
0.7110	surcharging


0.5208	credit_cards
0.3821	adulthood
0.3523	eightyeight
0.3509	four_trillion
0.3488	june
0.3482	thirteen
0.3404	retrained
0.3378	seven_hundred
0.3358	ptsd
0.3345	boys




In [21]:
model = cono_space
q = 'national_energy_tax'
pretrained.nearest_neighbor(q)
model.nearest_neighbor(q)

0.8599	capandtax
0.8132	capandtrade
0.7852	capandtrade_bill
0.7820	capandtrade_legislation
0.7721	jobkilling
0.7607	kill_jobs
0.7592	capandtax_bill
0.7557	every_american_family
0.7337	taxandtrade
0.7295	massive_new


0.6900	capandtrade
0.6735	jobkilling
0.5908	millions_of_jobs
0.5868	health_care_bill
0.5806	energy_tax
0.5620	tax_hikes
0.5475	cap_and_trade
0.5455	energy_costs
0.5119	job_creators
0.5027	stimulus_bill




In [15]:
pretrained.cosine_similarity('tariff', 'employers')

0.06573085486888885