In [1]:
import pickle
import csv
import os
from pathlib import Path
from typing import Set, Tuple, NamedTuple, List, Dict, Counter, Optional

import torch
import numpy as np
from scipy.spatial import distance
from scipy.stats import spearmanr

from evaluations.euphemism import Embedding, PhrasePair  
from recomposer import Recomposer, RecomposerConfig
from decomposer import Decomposer, DecomposerConfig


torch.manual_seed(42)
np.random.seed(42)

### Load Pretrained Embedding

In [2]:
pretrained = Embedding('../../data/pretrained_word2vec/for_real_SGNS.txt', 'plain_text')

vocab_size = 111,387, num_dimensions = 300
Loading embeddings from ../../data/pretrained_word2vec/for_real_SGNS.txt
Done


In [None]:
pretrained = Embedding('../../data/pretrained_word2vec/bill_mentions_HS.txt', 'plain_text')

### Denotation Space: Want to see stuff from other party

In [3]:
base_dir = Path('../../results/CR_skip/GM2')
deno_space = Embedding(
    base_dir / 'epoch5.pt', 'recomposer', device=torch.device('cuda:0'))
# cono_space = Embedding(
#     base_dir + 'affine/L4 0c -10b/epoch100.pt', 'decomposer', device=torch.device('cuda:0'))



In [4]:
def cf(q1, q2):
    model = deno_space
    print(pretrained.cosine_similarity(q1, q2))
    print(model.cosine_similarity(q1, q2))
    pretrained.nearest_neighbor(q1)
    model.nearest_neighbor(q1)
    print('\n')
    pretrained.nearest_neighbor(q2)
    model.nearest_neighbor(q2)

In [8]:
cf('undocumented_workers', 'illegal_aliens')

0.7523736357688904
0.7111165523529053
0.9278	undocumented_aliens
0.8685	guest_workers
0.8656	undocumented_immigrants
0.8520	guest_worker
0.8518	temporary_workers
0.8417	temporary_worker
0.8384	temporary_worker_program
0.8332	country_illegally
0.8313	guest_worker_program
0.8289	undocumented


0.9182	undocumented_aliens
0.8687	guest_workers
0.8640	undocumented_immigrants
0.8047	the_simpsonmazzoli_bill
0.8007	undocumented
0.8003	guest_worker_program
0.7941	simpsonmazzoli
0.7859	the_agjobs_bill
0.7735	guestworker
0.7725	foreign_workers




0.9042	illegal_immigrants
0.8584	illegals
0.8538	undocumented
0.8434	aliens
0.7708	country_illegally
0.7595	illegal_immigration
0.7564	undocumented_aliens
0.7524	undocumented_workers
0.7519	noncitizens
0.7465	undocumented_immigrants


0.9118	illegal_immigrants
0.8748	illegals
0.8584	aliens
0.8157	undocumented
0.7819	noncitizens
0.7735	country_illegally
0.7728	illegal_immigration
0.7570	criminal_aliens
0.7483	alien
0.7429	undocumented_aliens




In [10]:
cf('estate_tax', 'death_tax')

0.8849294781684875
0.860477864742279
0.8877	estate_taxes
0.8849	death_tax
0.8187	estates
0.8101	death_taxes
0.7886	estate_tax_relief
0.7375	estates_would
0.7299	inheritance
0.7283	capital_gains_taxes
0.7268	marriage_penalty
0.7076	marriage_penalty_relief


0.8605	death_tax
0.8271	estate_taxes
0.7898	estates
0.7202	estate_tax_relief
0.7095	death_taxes
0.6749	estate
0.6522	familyowned
0.6421	family_farms
0.6414	inheritance
0.6392	estates_would




0.8849	estate_tax
0.8553	death_taxes
0.8346	estate_taxes
0.7322	marriage_penalty
0.7317	estate_tax_relief
0.7292	repeal_the_death
0.7219	capital_gains_taxes
0.7182	marriage_penalty_tax
0.7157	taxable_event
0.7093	inheritance


0.8605	estate_tax
0.8387	death_taxes
0.8206	estate_taxes
0.7190	estate_tax_relief
0.7008	capital_gains_taxes
0.6881	confiscatory
0.6854	repeal_the_death
0.6838	familyowned
0.6836	estate
0.6821	family_farms




In [9]:
# Denotation Space: Want to see stuff from other party
# model.embedding = model.deno_embed
# q1 = 'universal_health_care'  # OOV
# q2 = 'socialized_medicine'
model = deno_space
q1 = 'singlepayer'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.8191059827804565
0.7416767477989197
0.8690	canadianstyle
0.8631	governmentrun_health_care
0.8543	singlepayer_system
0.8402	public_plan
0.8228	national_health_insurance
0.8196	governmentrun_system
0.8191	governmentrun
0.8179	government_plan
0.8168	national_health_care
0.8143	socialized_medicine


0.8080	public_plan
0.8020	national_health_insurance
0.8002	governmentrun_health_care
0.7982	national_health_care
0.7733	universal_health_care
0.7708	universal_coverage
0.7675	american_health_care
0.7620	current_health_care
0.7453	socialized_medicine
0.7434	public_health_insurance




0.8626	governmentrun_health_care
0.8471	government_plan
0.8423	governmentrun_plan
0.8311	public_plan
0.8260	government_option
0.8239	government_health_care
0.8216	socialized
0.8197	governmentrun_health
0.8191	singlepayer
0.8150	singlepayer_system


0.8718	governmentrun_health_care
0.8582	government_plan
0.8371	governmentrun_plan
0.8359	governmentcontrolled
0.8241	socialized
0.8169	public_plan
0.8167	government_op

In [11]:
model = deno_space
q1 = 'public_option'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.7376804947853088
0.6248732209205627
0.8982	public_plan
0.8337	government_plan
0.8298	government_option
0.8200	governmentrun_plan
0.8125	public_health_insurance
0.8042	insurance_plan
0.7996	private_health_insurance
0.7835	lower_costs
0.7831	singlepayer_system
0.7789	governmentrun_health


0.8371	public_plan
0.7358	public_health_insurance
0.7183	private_health_insurance
0.6986	singlepayer
0.6883	private_plans
0.6883	private_insurance_companies
0.6882	health_reform
0.6874	private_insurers
0.6818	insurance_plan
0.6817	government_plan




0.8626	governmentrun_health_care
0.8471	government_plan
0.8423	governmentrun_plan
0.8311	public_plan
0.8260	government_option
0.8239	government_health_care
0.8216	socialized
0.8197	governmentrun_health
0.8191	singlepayer
0.8150	singlepayer_system


0.8718	governmentrun_health_care
0.8582	government_plan
0.8371	governmentrun_plan
0.8359	governmentcontrolled
0.8241	socialized
0.8169	public_plan
0.8167	government_option
0.8162	governmentrun_health
0.8123	go

In [12]:
model = deno_space
q1 = 'independent_expenditures'
q2 = 'political_speech'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.7746047973632812
0.7723322510719299
0.9004	issue_ads
0.8925	national_parties
0.8922	outside_groups
0.8913	issue_advocacy
0.8907	hard_money
0.8849	state_parties
0.8737	softmoney
0.8703	soft_money_contributions
0.8642	pac_contributions
0.8605	ban_soft_money


0.8794	state_parties
0.8773	hard_money
0.8740	issue_advocacy
0.8737	national_parties
0.8628	ban_soft_money
0.8592	issue_ads
0.8556	soft_money_contributions
0.8433	pac_contributions
0.8363	softmoney
0.8280	campaign_spending




0.8611	first_amendment_rights
0.8571	issue_advocacy
0.8404	buckley_v_valeo
0.8315	express_advocacy
0.8270	buckley_versus_valeo
0.8265	the_first_amendments
0.8233	free_speech
0.8208	the_snowejeffords_amendment
0.8189	electioneering
0.8114	freespeech


0.9003	first_amendment_rights
0.8869	express_advocacy
0.8813	issue_advocacy
0.8786	outside_groups
0.8771	abridging
0.8763	the_first_amendments
0.8761	electioneering
0.8760	freespeech
0.8721	nonparty
0.8668	campaign_finances




In [13]:
model = deno_space
q1 = 'tax_breaks'
q2 = 'tax_relief'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.6967632174491882
0.633709728717804
0.8928	tax_break
0.8508	wealthiest_americans
0.8161	billion_in_tax
0.8126	tax_cuts
0.8098	wealthiest_people
0.7975	wealthy_americans
0.7931	giveaways
0.7890	huge_tax
0.7887	wealthiest_among_us
0.7876	billion_tax_cut


0.8182	tax_break
0.6783	tax_cuts
0.6610	billion_in_tax
0.6587	wealthiest_americans
0.6504	tax_incentives
0.6500	tax_credits
0.6358	tax_credit
0.6337	tax_relief
0.6317	tax_provisions
0.6306	luxury_taxes




0.8370	marriage_penalty_relief
0.8306	tax_relief_package
0.8262	tax_reduction
0.8096	tax_cut
0.8049	middleincome_tax
0.8035	perchild_tax_credit
0.8033	estate_tax_relief
0.8019	targeted_tax
0.7995	tax_cuts
0.7932	bipartisan_tax_relief


0.8297	tax_reduction
0.8015	tax_relief_package
0.8001	marriage_penalty_relief
0.7947	tax_reductions
0.7891	perchild_tax_credit
0.7843	the_taxpayer_relief_act
0.7836	tax_fairness
0.7818	the_taxpayer_refund_act
0.7806	tax_increases
0.7734	perchild




In [14]:
model = deno_space
q1 = 'socialized_medicine' # 'obamacare'
q2 = 'health_care_reform'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

0.6664696931838989
0.2795944809913635
0.8996	governmentrun_health_care
0.8941	singlepayer_system
0.8757	governmentrun_system
0.8508	government_health_care
0.8432	government_option
0.8425	takeover_of_health
0.8403	hillarycare
0.8356	obamacare
0.8345	socialized
0.8313	government_takeover


0.8901	singlepayer_system
0.8770	governmentrun_health_care
0.8580	governmentrun_system
0.8505	best_health_care
0.8448	government_health_care
0.8414	canadianstyle
0.8405	takeover_of_health
0.8405	socialized
0.8398	hillarycare
0.8356	governmentrun_health




0.9245	health_reform
0.8748	comprehensive_health_care
0.8673	the_health_care_reform
0.8347	care_reform
0.8337	health_care_debate
0.8105	health_insurance_reform
0.8093	health_care_legislation
0.8053	the_health_reform
0.8025	national_health_care
0.7985	health_care_bill


0.7112	health_care_system
0.6481	health_insurance
0.6031	health_care_costs
0.5962	medicare_and_medicaid
0.5848	consuming
0.5812	health_care_services
0.5798	providers
0.5798	youre
0.573

### Connotation Space: Want to see unrelated random entities of the same party

In [None]:
model = cono_space
q1 = 'gun_control'
q2 = 'illegal_aliens'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'wall_street_reform'
q2 = 'civil_rights'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'nuclear_arms_race'
q2 = 'credit_card'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q = 'national_energy_tax'
pretrained.nearest_neighbor(q)
model.nearest_neighbor(q)

In [None]:
pretrained.cosine_similarity('tariff', 'employers')