In [1]:
import pickle
import csv
import os
from pathlib import Path
from typing import Set, Tuple, NamedTuple, List, Dict, Counter, Optional

import torch
import numpy as np
from scipy.spatial import distance
from scipy.stats import spearmanr

from evaluations.euphemism import Embedding, PhrasePair  
from recomposer import Recomposer, RecomposerConfig
from decomposer import Decomposer, DecomposerConfig


torch.manual_seed(42)
np.random.seed(42)

### Load Pretrained Embedding

In [20]:
# pretrained = Embedding('../../data/pretrained_word2vec/bill_mentions_HS.txt', 'plain_text')
pretrained = Embedding('../../data/pretrained_word2vec/CR_ctx3_HS.txt', 'plain_text')
# pretrained = Embedding('../../data/pretrained_word2vec/CR_ctx3_SGNS.txt', 'plain_text')

vocab_size = 113,790, num_dimensions = 300
Loading embeddings from ../../data/pretrained_word2vec/CR_ctx3_HS.txt
Done


In [24]:
# stuff = Path('../../results/CR_topic/Ctx3 HS/L4R B512 LR1e-03/epoch100.pt')
# stuff = Path('../../results/CR_bill/Ctx3 HS/L4R B128 LR1e-03/epoch100.pt')

stuff = Path('../../results/CR_topic/M236_epoch25.pt')
# stuff = Path('../../results/CR_bill/Ctx3/L2 B2048 LR1e-03/epoch50.pt')

deno_space = Embedding(
    stuff, 'recomposer_deno', device=torch.device('cuda:0'))
cono_space = Embedding(
    stuff, 'recomposer_cono', device=torch.device('cuda:0'))

In [25]:
def tabulate(q1, q2):
    PE_cs = pretrained.cosine_similarity(q1, q2)
    DS_cs = deno_space.cosine_similarity(q1, q2)
    CS_cs = cono_space.cosine_similarity(q1, q2)
#     print(round(PE_cs, 4), f'{DS_cs:.4f}', f'{CS_cs:.4f}', 
#           q1, q2, sep='\t')
    print(round(PE_cs, 4), f'{DS_cs - PE_cs:+.4f}', f'{CS_cs - PE_cs:+.4f}', 
          q1, q2, sep='\t')

def tabulate_rank(q1, q2):
    print(
        pretrained.neighbor_rank(q1, q2),
        pretrained.neighbor_rank(q2, q1),
        deno_space.neighbor_rank(q1, q2),
        deno_space.neighbor_rank(q2, q1),
        cono_space.neighbor_rank(q1, q2),
        cono_space.neighbor_rank(q2, q1),
        q1, q2, sep='\t')

    
# def cf(q1, q2)
#     pretrained.nearest_neighbor(q1)
#     model.nearest_neighbor(q1)
#     print('\n')
#     pretrained.nearest_neighbor(q2)
#     model.nearest_neighbor(q2)

In [11]:
cherry_pairs = [
    # Luntz Report, all GOP euphemisms
#     ('government', 'washington'),
    # ('private_account', 'personal_account'),
    # ('tax_reform', 'tax_simplification'),
    ('estate_tax', 'death_tax'),
    ('capitalism', 'free_market'),  # global economy, globalization
    # ('outsourcing', 'innovation'),  # "root cause" of outsourcing, regulation
    ('undocumented', 'illegal_aliens'),  # OOV undocumented_workers
    ('foreign_trade', 'international_trade'),  # foreign, global all bad
    # ('drilling_for_oil', 'exploring_for_energy'),
    # ('drilling', 'energy_exploration'),
    # ('tort_reform', 'lawsuit_abuse_reform'),
    # ('trial_lawyer', 'personal_injury_lawyer'),  # aka ambulance chasers
    # ('corporate_transparency', 'corporate_accountability'),
    # ('school_choice', 'parental_choice'),  # equal_opportunity_in_education
    # ('healthcare_choice', 'right_to_choose')

    # Own Cherries
    ('public_option', 'governmentrun'),
#     ('political_speech', 'campaign_spending'),  # hard example
#     ('cut_taxes', 'supplyside'),  # OOV supplyside
]



In [23]:
for q1, q2 in cherry_pairs:  # bill
    tabulate(q1, q2)

0.8991	+0.0497	-0.7557	estate_tax	death_tax
0.791	+0.1060	+0.0336	capitalism	free_market
0.8069	+0.0255	-0.0062	undocumented	illegal_aliens
0.8985	-0.0471	+0.0237	foreign_trade	international_trade
0.6718	+0.0635	-0.5721	public_option	governmentrun


In [27]:
for q1, q2 in cherry_pairs:  # topic
    tabulate(q1, q2)

0.8991	+0.0741	-0.8487	estate_tax	death_tax
0.791	+0.1355	+0.1599	capitalism	free_market
0.8069	-0.0965	+0.1427	undocumented	illegal_aliens
0.8985	+0.0183	-0.0088	foreign_trade	international_trade
0.6718	+0.2401	-0.8447	public_option	governmentrun


In [81]:
for q1, q2 in cherry_pairs:
    tabulate_rank(q1, q2)

2	1	19	4	23862	20891	estate_tax	death_tax
49415	16256	2691	3660	7514	4721	capitalism	free_market


KeyError: 'undocumented_workers'

In [None]:
cf('undocumented_workers', 'illegal_aliens')

In [None]:
cf('estate_tax', 'death_tax')

In [None]:
# Denotation Space: Want to see stuff from other party
# model.embedding = model.deno_embed
# q1 = 'universal_health_care'  # OOV
# q2 = 'socialized_medicine'
model = deno_space
q1 = 'singlepayer'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'public_option'
q2 = 'governmentrun'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'independent_expenditures'
q2 = 'political_speech'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'tax_breaks'
q2 = 'tax_relief'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = deno_space
q1 = 'socialized_medicine' # 'obamacare'
q2 = 'health_care_reform'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

### Connotation Space: Want to see unrelated random entities of the same party

In [None]:
model = cono_space
q1 = 'gun_control'
q2 = 'illegal_aliens'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'wall_street_reform'
q2 = 'civil_rights'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q1 = 'nuclear_arms_race'
q2 = 'credit_card'
print(pretrained.cosine_similarity(q1, q2))
print(model.cosine_similarity(q1, q2))
pretrained.nearest_neighbor(q1)
model.nearest_neighbor(q1)
print('\n')
pretrained.nearest_neighbor(q2)
model.nearest_neighbor(q2)

In [None]:
model = cono_space
q = 'national_energy_tax'
pretrained.nearest_neighbor(q)
model.nearest_neighbor(q)

In [None]:
pretrained.cosine_similarity('tariff', 'employers')