In [1]:
import pickle
import csv
import os
from pathlib import Path
from typing import Set, Tuple, NamedTuple, List, Dict, Counter, Optional

import torch
import numpy as np
from scipy.spatial import distance
from scipy.stats import spearmanr

from evaluations.euphemism import Embedding, PhrasePair  
from recomposer import Recomposer, RecomposerConfig
from decomposer import Decomposer, DecomposerConfig


torch.manual_seed(42)
np.random.seed(42)

### Load Pretrained Embedding

In [2]:
pretrained = Embedding('../../data/pretrained_word2vec/bill_mentions_HS.txt', 'plain_text')

vocab_size = 24,005, num_dimensions = 300
Loading embeddings from ../../data/pretrained_word2vec/bill_mentions_HS.txt
Done


In [29]:
stuff = Path('../../results/CR_topic/search greek/Dd0.42 Dg-0.31 Cd-0.80 Cg0.88 R0.11/epoch25.pt')
# stuff = Path('../../results/CR_bill/Ctx3/L2 B2048 LR1e-03/epoch50.pt')
deno_space = Embedding(
    stuff, 'recomposer_deno', device=torch.device('cuda:0'))
cono_space = Embedding(
    stuff, 'recomposer_cono', device=torch.device('cuda:0'))

In [30]:
def tabulate(q1, q2):
    PE_cs = pretrained.cosine_similarity(q1, q2)
    DS_cs = deno_space.cosine_similarity(q1, q2)
    CS_cs = cono_space.cosine_similarity(q1, q2)
    # also get neighbor ranking?
    print(round(PE_cs, 4), round(DS_cs, 4), round(CS_cs, 4), q1, q2, sep='\t')

def tabulate_rank(q1, q2):
    print(
        pretrained.neighbor_rank(q1, q2),
        pretrained.neighbor_rank(q2, q1),
        deno_space.neighbor_rank(q1, q2),
        deno_space.neighbor_rank(q2, q1),
        cono_space.neighbor_rank(q1, q2),
        cono_space.neighbor_rank(q2, q1),
        q1, q2, sep='\t')

    
# def cf(q1, q2)
#     pretrained.nearest_neighbor(q1)
#     model.nearest_neighbor(q1)
#     print('\n')
#     pretrained.nearest_neighbor(q2)
#     model.nearest_neighbor(q2)

In [31]:
cherry_pairs = [
    # Luntz Report, all GOP euphemisms
    ('government', 'washington'),
    # ('private_account', 'personal_account'),
    # ('tax_reform', 'tax_simplification'),
    ('estate_tax', 'death_tax'),
    ('capitalism', 'free_market'),  # global economy, globalization
    # ('outsourcing', 'innovation'),  # "root cause" of outsourcing, regulation
    ('undocumented', 'illegal_aliens'),  # OOV undocumented_workers
    ('foreign_trade', 'international_trade'),  # foreign, global all bad
    # ('drilling_for_oil', 'exploring_for_energy'),
    # ('drilling', 'energy_exploration'),
    # ('tort_reform', 'lawsuit_abuse_reform'),
    # ('trial_lawyer', 'personal_injury_lawyer'),  # aka ambulance chasers
    # ('corporate_transparency', 'corporate_accountability'),
    # ('school_choice', 'parental_choice'),  # equal_opportunity_in_education
    # ('healthcare_choice', 'right_to_choose')

    # Own Cherries
    ('public_option', 'governmentrun'),
#     ('political_speech', 'campaign_spending'),  # hard example
    ('cut_taxes', 'trickledown'),  # OOV supplyside
]



In [32]:
for q1, q2 in cherry_pairs:
    tabulate_rank(q1, q2)

22515	19518	1068	2052	2290	14324	government	washington
2	1	19	4	23862	20891	estate_tax	death_tax
10313	1530	2691	3660	7514	4721	capitalism	free_market
44	53	13822	20036	725	2462	undocumented	illegal_aliens
1707	270	158	549	10710	6548	foreign_trade	international_trade
5948	16000	942	3151	23771	23251	public_option	governmentrun
1632	12387	2390	4497	23508	18593	cut_taxes	trickledown


In [None]:
for q1, q2 in cherry_pairs:
    tabulate(q1, q2)

In [None]:
cf('undocumented_workers', 'illegal_aliens')

In [None]:
cf('estate_tax', 'death_tax')

In [None]:
# Denotation Space: Want to see stuff from other party
# model.embedding = model.deno_embed
# q1 = 'universal_health_care'  # OOV
# q2 = 'socialized_medicine'
model = deno_space
q1 = 'singlepayer'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'public_option'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'independent_expenditures'
q2 = 'political_speech'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'tax_breaks'
q2 = 'tax_relief'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'socialized_medicine' # 'obamacare'
q2 = 'health_care_reform'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

### Connotation Space: Want to see unrelated random entities of the same party

In [None]:
model = cono_space
q1 = 'gun_control'
q2 = 'illegal_aliens'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'wall_street_reform'
q2 = 'civil_rights'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'nuclear_arms_race'
q2 = 'credit_card'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q = 'national_energy_tax'
pretrained.nearest_neighbor(q)
model.nearest_neighbor(q)

In [None]:
pretrained.cosine_similarity('tariff', 'employers')