In [12]:
import pickle
import csv
import os
from pathlib import Path
from typing import Set, Tuple, NamedTuple, List, Dict, Counter, Optional

import torch
import numpy as np
from scipy.spatial import distance
from scipy.stats import spearmanr

from evaluations.euphemism import Embedding, PhrasePair  
from old_congress import Decomposer, Recomposer, RecomposerConfig


torch.manual_seed(42)
np.random.seed(42)

### Load Pretrained Embedding

In [22]:
pretrained = Embedding('../../data/pretrained_word2vec/for_real.txt', 'plain_text')

vocab_size = 111,387, num_dimensions = 300
Loading embeddings from ../../data/pretrained_word2vec/for_real.txt
Done


In [14]:
stuff = Path('../../results/CR_skip/GM2/B8 NS10/epoch4.pt')

deno_space = Embedding(
    stuff, 'recomposer_deno', device=torch.device('cuda:0'))
cono_space = Embedding(
    stuff, 'recomposer_cono', device=torch.device('cuda:0'))



In [23]:
def tabulate(q1, q2):
    PE_cs = pretrained.cosine_similarity(q1, q2)
    DS_cs = deno_space.cosine_similarity(q1, q2)
    CS_cs = cono_space.cosine_similarity(q1, q2)
#     print(round(PE_cs, 4), f'{DS_cs:.4f}', f'{CS_cs:.4f}', 
#           q1, q2, sep='\t')
    print(round(PE_cs, 4), f'{DS_cs - PE_cs:+.4f}', f'{CS_cs - PE_cs:+.4f}', 
          q1, q2, sep='\t')
    
def tabulate_rank(q1, q2):
    print(
        pretrained.neighbor_rank(q1, q2),
        pretrained.neighbor_rank(q2, q1),
        deno_space.neighbor_rank(q1, q2),
        deno_space.neighbor_rank(q2, q1),
        cono_space.neighbor_rank(q1, q2),
        cono_space.neighbor_rank(q2, q1),
        q1, q2, sep='\t')

def cf(q1, q2):
    pretrained.nearest_neighbor(q1)
    model.nearest_neighbor(q1)
    print('\n')
    pretrained.nearest_neighbor(q2)
    model.nearest_neighbor(q2)

In [16]:
cherry_pairs = [
    # Luntz Report, all GOP euphemisms
    ('government', 'washington'),
    # ('private_account', 'personal_account'),
    # ('tax_reform', 'tax_simplification'),
    ('estate_tax', 'death_tax'),
    ('capitalism', 'free_market'),  # global economy, globalization
    # ('outsourcing', 'innovation'),  # "root cause" of outsourcing, regulation
    ('undocumented_workers', 'illegal_aliens'),  # OOV undocumented_workers
    ('foreign_trade', 'international_trade'),  # foreign, global all bad
    # ('drilling_for_oil', 'exploring_for_energy'),
    # ('drilling', 'energy_exploration'),
    # ('tort_reform', 'lawsuit_abuse_reform'),
    # ('trial_lawyer', 'personal_injury_lawyer'),  # aka ambulance chasers
    # ('corporate_transparency', 'corporate_accountability'),
    # ('school_choice', 'parental_choice'),  # equal_opportunity_in_education
    # ('healthcare_choice', 'right_to_choose')

    # Own Cherries
    ('public_option', 'governmentrun'),
    ('political_speech', 'campaign_spending'),  # hard example
    ('cut_taxes', 'supply_side'),  
    ('cut_taxes', 'trickledown'),  
    ('voodoo', 'supply_side'),
    
    # large vocabulary
    ('star_wars', 'strategic_defense_initiative'),
    ('socialized_medicine', 'singlepayer'),
    ('cap_and_trade', 'national_energy_tax'),
    ('waterboarding', 'interrogation'),
    ('tax_expenditures', 'spending_programs'),
    ('nuclear_option', 'constitutional_option'),
]

In [20]:
for q1, q2 in cherry_pairs:
    tabulate(q1, q2)

0.4729	+0.2342	+0.2668	government	washington
0.8849	-0.0299	-0.3192	estate_tax	death_tax
0.7127	-0.0354	+0.0690	capitalism	free_market
0.7524	-0.0697	-0.7449	undocumented_workers	illegal_aliens
0.7617	-0.0480	-0.0022	foreign_trade	international_trade
0.7377	-0.1361	-0.3948	public_option	governmentrun
0.7671	-0.1206	-0.5883	political_speech	campaign_spending
0.6801	-0.2318	-0.1474	cut_taxes	supply_side
0.6838	-0.2633	-0.1515	cut_taxes	trickledown
0.7633	-0.0564	+0.0713	voodoo	supply_side
0.8032	-0.0608	-0.2067	star_wars	strategic_defense_initiative
0.8143	-0.0823	-0.2158	socialized_medicine	singlepayer
0.8562	+0.0181	+0.0153	cap_and_trade	national_energy_tax
0.8242	-0.0412	-0.1247	waterboarding	interrogation
0.7562	-0.1235	-0.5205	tax_expenditures	spending_programs
0.8959	-0.1864	-0.8598	nuclear_option	constitutional_option


In [24]:
for q1, q2 in cherry_pairs:
    tabulate(q1, q2)

0.2075	+0.4996	+0.5322	government	washington
0.8923	-0.0373	-0.3266	estate_tax	death_tax
0.7375	-0.0601	+0.0443	capitalism	free_market
0.7815	-0.0988	-0.7740	undocumented_workers	illegal_aliens
0.7424	-0.0287	+0.0172	foreign_trade	international_trade
0.7268	-0.1252	-0.3839	public_option	governmentrun
0.682	-0.0354	-0.5031	political_speech	campaign_spending
0.5212	-0.0728	+0.0116	cut_taxes	supply_side
0.5431	-0.1227	-0.0108	cut_taxes	trickledown
0.6417	+0.0652	+0.1929	voodoo	supply_side
0.8469	-0.1044	-0.2503	star_wars	strategic_defense_initiative
0.6995	+0.0325	-0.1010	socialized_medicine	singlepayer
0.7152	+0.1591	+0.1563	cap_and_trade	national_energy_tax
0.773	+0.0100	-0.0735	waterboarding	interrogation
0.6084	+0.0243	-0.3727	tax_expenditures	spending_programs
0.8094	-0.1000	-0.7734	nuclear_option	constitutional_option


In [None]:
cf('undocumented_workers', 'illegal_aliens')

In [None]:
cf('estate_tax', 'death_tax')

In [None]:
# Denotation Space: Want to see stuff from other party
# model.embedding = model.deno_embed
# q1 = 'universal_health_care'  # OOV
# q2 = 'socialized_medicine'
model = deno_space
q1 = 'singlepayer'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'public_option'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'independent_expenditures'
q2 = 'political_speech'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'tax_breaks'
q2 = 'tax_relief'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'socialized_medicine' # 'obamacare'
q2 = 'health_care_reform'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

### Connotation Space: Want to see unrelated random entities of the same party

In [None]:
model = cono_space
q1 = 'gun_control'
q2 = 'illegal_aliens'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'wall_street_reform'
q2 = 'civil_rights'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'nuclear_arms_race'
q2 = 'credit_card'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q = 'national_energy_tax'
pretrained.nearest_neighbor(q)
model.nearest_neighbor(q)

In [None]:
pretrained.cosine_similarity('tariff', 'employers')