In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from datetime import datetime
import numpy as np
import json

from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()

In [2]:
with open("C:/Users/Eddie/Documents/language-change-methods/word_lists/function_words.txt") as func_file:
    function_words = [line.strip().lower() for line in func_file.readlines()]

In [3]:
DB_FP = "C:/Users/Eddie/Documents/Datasets/commons.db"
hansard_dir = "C:/Users/Eddie/Documents/hansard-stuff"

In [4]:
import sys
sys.path.insert(1, hansard_dir + "/Analysis/pipeline")
sys.path.insert(1, "C:/Users/Eddie/Documents/language-change-methods")

from utility_functions import get_data_windows, get_time_windows

In [5]:
dates_fp = hansard_dir + "/key-dates.csv"

key_dates = pd.read_csv(dates_fp, delimiter="\t")

convert_to_date = lambda x: datetime.strptime(x, "%d-%m-%Y")
key_dates["date"] = key_dates["date"].apply(convert_to_date)
key_dates.set_index("date", inplace=True)
key_dates = key_dates.sort_index(ascending=True)

In [6]:
sql_get_all_posts ="""
SELECT c.uid, m.name, m.PimsId, p.party, d.date, c.body, c.topic, c.section, s.tmay_deal, s.benn_act, s.ref_stance, s.constituency_leave, c.usas_file
FROM contributions as c
INNER JOIN members as m
ON m.PimsId = c.member
INNER JOIN debates as d
ON d.uid = c.debate
INNER JOIN member_party as p
ON p.PimsId = m.PimsId
INNER JOIN member_stances as s
ON s.PimsId = m.PimsId
WHERE (d.date BETWEEN date("2015-05-01") AND date("2019-09-10"))
AND (((d.date BETWEEN p.start AND p.end) AND NOT (p.end IS NULL))
OR ((d.date >= p.start) AND (p.end IS NULL)));""".strip()

# regex for identifying EU/brexit mentions
eu_regex = r'\b(EU|[Ee]uropean [Uu]nion|[Bb]rexit)\b'

In [7]:
%%time
import sqlite3

conn = sqlite3.connect(DB_FP)
curs = conn.cursor()

# Gets all the contributions and creates a nice dataframe
all_contributions = pd.read_sql_query(sql_get_all_posts, conn)
all_contributions.columns = ['uid', 'name', 'PimsId', 'party', 'date', 'text', 'topic', 'section', 'tmay_deal', 'benn_act', 'ref_stance', 'constituency_leave', 'usas_file']
all_contributions.set_index("uid", inplace=True)
convert_to_date = lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S")
all_contributions['date'] = all_contributions['date'].apply(convert_to_date)
all_contributions.sort_values("date", inplace=True)

Wall time: 14.1 s


In [8]:
%%time
from text_processing import clean_text, spacy_tokenise# spacy_pos
from text_processing import ucrel_tokenise
import nltk
import regex as re    
import spacy

nlp = spacy.load('en_core_web_sm', parser=False, entity=False, matcher=False, add_vectors=False)

def tokenise(text):
    cleaned = clean_text(text)
    cleaned = re.sub(r"(\p{P})\p{P}*", r"\1 ", cleaned)
    tokens = spacy_tokenise(cleaned)
    return tokens

all_toks =  all_contributions["text"].apply(tokenise)

Wall time: 2min 12s


In [9]:
def get_top_vocab_and_vectors(model, n=10000):
    """
    Gets the top n words from the model's vocabulary and the vectors of these words.
    """
    top_vocab = sorted(model.wv.vocab.keys(), key=lambda x: model.wv.vocab[x].count, reverse=True)[:n]
    top_vectors = np.array([model.wv[t] for t in top_vocab])
    return top_vocab, top_vectors

In [10]:
def save_word_vectors(model, voc_fp, vec_fp):
    vocs = sorted(model.wv.vocab.keys(), key=lambda x: model.wv.vocab[x].count, reverse=True)[:10000]
    vecs = np.array([model.wv[t] for t in vocs])
    
    with open(voc_fp, 'w') as voc_file:
        json.dump(vocs, voc_file)
        
    np.save(vec_fp, vecs)

In [11]:
def check_dir(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

In [12]:
from gensim.models import Word2Vec

# suppress some deprecation warning..
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [13]:
EMB_DIR = "C:/Users/Eddie/Documents/Datasets/Hansard Output/Embedding_Models"
LOAD = False
# LOAD = os.path.exists('word_vectors/static/word_vectors.npy')

# Model on all contributions

In [14]:
%%time
curr_dir = os.path.join(EMB_DIR, "static")
check_dir(curr_dir)

if LOAD:
    print("Loading Model")
#     model = Word2Vec.load('word_vectors/w2v_all_contributions_static.npy')
    with open(os.path.join(curr_dir, 'vocab_all.json')) as voc_file:
        vocs = json.load(voc_file)
    vecs = np.load(os.path.join(curr_dir, 'word_vectors_all.npy'))
else:
    # training the model
    print("Training model")
    model = Word2Vec(all_toks, size=300)
    # model.save('word_vectors/w2v_all_contributions_static.bin')
    
    save_word_vectors(model, 
                      os.path.join(curr_dir, 'vocab_all.json'), 
                      os.path.join(curr_dir, 'word_vectors_all.npy'))

Training model
Wall time: 2min 22s


In [15]:
t10000 = sorted(model.wv.vocab.keys(), key=lambda x: model.wv.vocab[x].count, reverse=True)[:10000]
top_vectors = np.array([model.wv[t] for t in t10000])

In [16]:
print("Vocab size: {}".format(len(model.wv.vocab)))

Vocab size: 38474


In [16]:
def show_top_token_freq(model,topn):
    for w, v in sorted(list(model.wv.vocab.items()), key=lambda x:x[1], reverse=True)[:topn]:
        if topn<=20:
            print(f"{w:>10s} {v.count:5d}")
        else:
            print(f"{w}({v.count}), ", end="")
            
show_top_token_freq(model, 10)

           3825357
       the 2803252
         , 1860753
         . 1806067
        to 1448369
        of 1145774
      that 1130004
       and 1095262
        in 839871
         a 724597


In [17]:
def get_word_similarity(model, word, topn=10):
    if model.wv.__contains__(word):
        vecs = [f"{w}({v:.5f})" for w, v in model.wv.most_similar(word)]
        print(f"{word}:\n{vecs}")
    else:
        print(f"'{word}' not found in this model")
    print("--"*10)

words = ['brexit', 'eu', 'union', 'european', 'europe', 'remainers', 'trade']

for word in words:
     get_word_similarity(model,word)

brexit:
['mortem(0.53850)', 'mortems(0.49493)', 'exit(0.49236)', 'referendum(0.46494)', 'renegotiation(0.43924)', 'brexiteers(0.42415)', 'negotiation(0.40236)', 'ttip(0.39831)', 'pre(0.39447)', 'departure(0.39091)']
--------------------
eu:
['eea(0.71529)', 'european(0.68049)', 'euratom(0.67783)', 'euNUMBER(0.67759)', 'eurozone(0.58657)', 'cfp(0.57848)', 'echr(0.55715)', 'europe(0.55293)', 'euro(0.53939)', 'efta(0.52098)']
--------------------
union:
['union-(0.55473)', 'eu(0.52017)', 'pan-(0.51891)', 'superstate(0.50384)', 'convention(0.44481)', 'euratom(0.44203)', 'euNUMBER(0.43036)', 'declarations(0.43011)', 'cfp(0.41721)', 'unions(0.41643)']
--------------------
european:
['eu(0.68049)', 'soviet(0.59709)', 'crowns(0.57066)', 'customs(0.54882)', 'euratom(0.52345)', 'subscriptions(0.51557)', 'cinematograph(0.51187)', 'reps(0.49368)', 'europe(0.49334)', 'efta(0.48609)']
--------------------
europe:
['continent(0.58956)', 'world(0.58794)', 'eu(0.55293)', 'balkans(0.55054)', 'mediterran

In [19]:
result = model.wv.most_similar(positive=['remain', 'brexiteer'], negative=['leave'])
for word, score in result:
    print(f"{word:>15s}: {score:.4f}")

       remainer: 0.4594
parliamentarian: 0.4274
     campaigner: 0.4274
  devolutionist: 0.4258
     politician: 0.4162
    eurosceptic: 0.4117
       defender: 0.4107
       believer: 0.4087
        marxist: 0.4060
      socialist: 0.4038


### Get neighbours of keywords for chapter

In [84]:
queries = ["referendum", "brexit", "immigration", "leave"]
for query in queries:
#     print(query)
    print(",".join(neighbors(query, top_vectors, t10000, 10)))
#     print("")

election,vote,elections,brexit,leave,voted,votes,article,parliament,negotiations
referendum,exit,deal,vote,eu,austerity,negotiations,devolution,backstop,outcome
migration,asylum,welfare,fisheries,detention,border,trade,visa,migrants,sanctions
stay,remain,referendum,leaving,left,exit,lose,leaves,go,vote


# Methods for comparing models

In [18]:
def neighbors(query : str,
              embs: np.ndarray,
              vocab: list,
              K : int = 3) -> list:
    sims = np.dot(embs[vocab.index(query),],embs.T)
    output = []
    for sim_idx in sims.argsort()[::-1][1:(1+K)]:
        if sims[sim_idx] > 0:
            output.append(vocab[sim_idx])
    return output

In [19]:
def get_most_changey_words_with_models(model1, model2, n=100, k=1000, top_n=None):
    nn_scores = []
    
    top_vocab = sorted(model1.wv.vocab.keys(), key=lambda x: model1.wv.vocab[x].count, reverse=True)[:top_n]
    
    vocab1 = model1.wv.vocab
    vocab2 = model2.wv.vocab
    # Loop through all the words in the vocab
    for w in vocab1:
        if (w not in function_words 
                and w in vocab1 
                and w in vocab2 
                and vocab1[w].count > n 
                and vocab2[w].count > n 
                and w in top_vocab):
            neighbours1 = set([x[0] for x in model1.wv.most_similar(w, topn=k)])
            neighbours2 = set([x[0] for x in model2.wv.most_similar(w, topn=k)])
            nn_scores.append((len(neighbours1.intersection(neighbours2)), w))
            
    nn_scores_sorted = sorted(nn_scores)
    return nn_scores_sorted

In [87]:
def get_most_changey_words_with_vectors(vocab1, vocab2, vectors1, vectors2, n=20, k=1000):
    nn_scores = []
    # Loop through all the words in the vocab
    for w in vocab1:
        if w not in function_words and w in vocab1 and w in vocab2:
            neighbours1 = set(neighbors(w, vectors1, vocab1, k))
            neighbours2 = set(neighbors(w, vectors2, vocab2, k))
            nn_scores.append((len(neighbours1.intersection(neighbours2)), w))
            
    nn_scores_sorted = sorted(nn_scores)
    return nn_scores_sorted

# Labour vs Conservative

In [21]:
conservatives = all_contributions.query("party == 'Conservative'")
labour = all_contributions.query("party == 'Labour'")

### Compare with models

In [24]:
%%time

# Conservative model
con_model = Word2Vec(all_toks.loc[conservatives.index], size=300)

# Labour model
lab_model = Word2Vec(all_toks.loc[labour.index], size=300)

Wall time: 1min 43s


In [25]:
%%time
ranked_words_models = get_most_changey_words_with_models(con_model, lab_model, n=10, k=1000)

Wall time: 1min 25s


In [26]:
ranked_words_models[:20]

[(10, 'lab'),
 (17, 'ea'),
 (20, 'honours'),
 (21, 'con'),
 (21, 'faraday'),
 (22, 'disappears'),
 (24, 'nudge'),
 (25, 'caseload'),
 (25, 'indcs'),
 (25, 'straightaway'),
 (26, 'renationalisation'),
 (27, 'decant'),
 (27, 'ordinarily'),
 (28, 'speculated'),
 (29, 'accomplished'),
 (29, 'nettle'),
 (29, 'settles'),
 (29, 'spun'),
 (30, 'mandating'),
 (30, 'reaping')]

### Compare with vectors

In [27]:
%%time
vocab_con, vectors_con = get_top_vocab_and_vectors(con_model)
vocab_lab, vectors_lab = get_top_vocab_and_vectors(lab_model)

Wall time: 58.5 ms


In [28]:
%%time
ranked_words_vectors = get_most_changey_words_with_vectors(vocab_con, vocab_lab, vectors_con, vectors_lab, k=1000)

Wall time: 1min


In [29]:
ranked_words_vectors[:20]

[(197, 'honours'),
 (197, 'mirrors'),
 (209, 'bypass'),
 (215, 'presiding'),
 (230, 'continuously'),
 (235, 'supposedly'),
 (274, 'manual'),
 (276, 'redditch'),
 (280, 'deane'),
 (283, 'promptly'),
 (285, 'harlow'),
 (286, 'seemingly'),
 (287, 'inadvertently'),
 (292, 'revolutionary'),
 (303, 'naturally'),
 (306, 'duck'),
 (307, 'continually'),
 (314, 'stirling'),
 (315, 'remotely'),
 (323, 'bogus')]

### Some Examples with Neighbours

In [30]:
min_freq = 50
check_freq = lambda w, m: m.wv.vocab[w].count > min_freq
queries = [w[1] for w in ranked_words_vectors if check_freq(w[1],con_model) and check_freq(w[1],lab_model)]
queries = queries[:10]
queries

['mirrors',
 'presiding',
 'supposedly',
 'harlow',
 'seemingly',
 'inadvertently',
 'naturally',
 'continually',
 'stirling',
 'bogus']

In [31]:
for query in queries:
    print(query)
    print("Con:", neighbors(query, vectors_con, vocab_con, 8))
    print("Lab:", neighbors(query, vectors_lab, vocab_lab, 8))
    print("")

mirrors
Con: ['provides', 'contains', 'applies', 'seeks', 'covers', 'repeal', 'includes', 'relating']
Lab: ['likely', 'getting', 'becoming', 'words', 'self-', 'who', 'from', 'political']

presiding
Con: ['police', 'senior', 'officer', 'prison', 'crown', 'presiding', 'director', 'royal']
Lab: ['weekend', 'years', 'austerity', 'decades', 'decade', 'half', 'presided', 'taking']

supposedly
Con: ['whose', 'old', 'by', 'liberal', 'convicted', 'killed', 'acts', 'man']
Lab: ['company', 'property', 'workers', 'less', 'based', 'trade', 'organisation', 'non-']

harlow
Con: ['st', 'constituency', 'town', 'valley', 'mid', 'west', 'park', 'county']
Lab: ['hon', ')', 'friend', 'sir', 'dr', 'john', 'lady', 'david']

seemingly
Con: ['sedentary', 'woman', 'an', ')', 'there', 'so-', 'death', 'person']
Lab: ['brexit', 'financial', 'caused', 'zero-', 'energy', 'rhetoric', 'delay', 'drug']

inadvertently
Con: ['anything', 'harm', 'damage', 'otherwise', 'inadvertently', 'any', 'anyone', 'anybody']
Lab: ['te

In [32]:
min_freq = 50
check_freq = lambda w, m: m.wv.vocab[w].count > min_freq
queries = [w[1] for w in ranked_words_models if check_freq(w[1],con_model) and check_freq(w[1],lab_model)]
queries = queries[:10]
queries

['thereafter',
 'mirrors',
 'nationwide',
 'presiding',
 'super-',
 'supposedly',
 'ideally',
 'alert',
 'like-',
 'simultaneously']

In [33]:
for query in queries:
    print(query)
    print("Con:", [x[0] for x in con_model.wv.most_similar(query, topn=8)])
    print("Lab:", [x[0] for x in lab_model.wv.most_similar(query, topn=8)])
    print("")

thereafter
Con: ['afterwards', 'subsequent', 'commence', 'until', 'dissolution', 'revaluation', 'recess', 'immediately']
Lab: ['wounding', 'traineeship', 'apache', 'melton', 'atcham', 'clydebridge', 'frome', 'tapers']

mirrors
Con: ['criminalises', 'amends', 'repeals', 'defunct', 'streamlines', 'hybrid', 'authorises', 'skelly']
Lab: ['soundbites', 'smoke', 'hordes', 'reactionary', 'enthused', 'shirkers', 'bureaucrats', 'insults']

nationwide
Con: ['pilot', 'multi-', 'voucher', 'designing', 'hyper-', 'groundbreaking', 'genomes', 'outreach']
Lab: ['clydebridge', 'wigan', 'salford', 'whiston', 'outer', 'haringey', 'edmonton', 'rotherham']

presiding
Con: ['commanding', 'certification', 'warranted', 'senior', 'chief', 'police', 'miNUMBER', 'outgoing']
Lab: ['preside', 'presided', 'roughshod', 'glossed', 'hangs', 'presides', 'hanging', 'pored']

super-
Con: ['weston-', 'mare', 'berwick-', 'aldridge-', 'selby', 'tweed', 'upon-', 'brigg']
Lab: ['corporations', 'rich', 'giveaways', 'wealthy', 

# EU vs Non-EU

In [22]:
%%time
from build_features import split_corpus

eu_mentions, non_eu_mentions = split_corpus(all_contributions, "eu")

Wall time: 15.9 s


In [35]:
%%time

# EU model
eu_model = Word2Vec(all_toks.loc[eu_mentions.index], size=300)

# Non-EU model
neu_model = Word2Vec(all_toks.loc[non_eu_mentions.index], size=300)

Wall time: 2min 2s


### With Models

In [36]:
%%time
eu_ranked_words_models = get_most_changey_words_with_models(eu_model, neu_model, n=10, k=1000)

Wall time: 1min 16s


In [37]:
eu_ranked_words_models[:20]

[(3, 'ord'),
 (3, 'sterling'),
 (7, 'entrenchment'),
 (8, 'ceta'),
 (12, 'suing'),
 (13, 'circuit'),
 (14, 'decree'),
 (14, 'formalities'),
 (15, 'eurosceptic'),
 (15, 'gras'),
 (15, 'restarted'),
 (16, 'cemented'),
 (16, 'merchants'),
 (16, 'rigidity'),
 (16, 'unskilled'),
 (17, 'busting'),
 (17, 'exiting'),
 (17, 'foie'),
 (17, 'redefining'),
 (17, 'tra')]

### With Vectors

In [38]:
%%time
vocab_eu, vectors_eu = get_top_vocab_and_vectors(eu_model)
vocab_neu, vectors_neu = get_top_vocab_and_vectors(neu_model)

Wall time: 56.5 ms


In [39]:
%%time
eu_ranked_words_vectors = get_most_changey_words_with_vectors(vocab_eu, vocab_neu, vectors_eu, vectors_neu, k=1000)

Wall time: 58.5 s


In [40]:
eu_ranked_words_vectors[:20]

[(92, 'ii'),
 (150, 'master'),
 (153, 'sterling'),
 (195, 'dangerously'),
 (198, 'correcting'),
 (198, 'straightaway'),
 (200, 'prisoner'),
 (217, 'bypass'),
 (218, 'conversion'),
 (223, 'staying'),
 (225, 'naturally'),
 (225, 'relentlessly'),
 (226, 'honours'),
 (231, 'contracting'),
 (237, 'manifestly'),
 (239, 'leaving'),
 (244, 'post'),
 (244, 'timetabling'),
 (245, 'leavers'),
 (246, 'invariably')]

### Some Examples

In [41]:
min_freq = 50
check_freq = lambda w, m: m.wv.vocab[w].count > min_freq
queries = [w[1] for w in eu_ranked_words_vectors if check_freq(w[1],eu_model) and check_freq(w[1],neu_model)]
queries = queries[:10]
queries

['ii',
 'sterling',
 'correcting',
 'prisoner',
 'bypass',
 'conversion',
 'staying',
 'naturally',
 'honours',
 'contracting']

In [42]:
for query in queries:
    print(query)
    print("EU :", neighbors(query, vectors_eu, vocab_eu, 8))
    print("NEU:", neighbors(query, vectors_neu, vocab_neu, 8))
    print("")

ii
EU : ['ii', 'regulation', 'arrest', 'information', 'directive', 'existing', 'access', '(']
NEU: ['c', 'b', 'st', 'o', 'elizabeth', 'insert', 'king', 'victoria']

sterling
EU : ['NUMBER%', 'unemployment', 'growth', 'exports', 'pound', 'wages', 'average', 'poverty']
NEU: ['sterling', 'pensions', 'fantastic', 'excellent', 'tireless', '(', 'sir', 'constituent']

correcting
EU : ['powers', 'viii', 'henry', 'clauses', 'bill', 'section', 'delegated', 'statutory']
NEU: ['put', 'correct', 'telling', 'giving', 'saying', 'talking', 'told', 'struck']

prisoner
EU : ['directive', 'regulations', 'non-', 'treaty', 'sanctions', 'data', 'regulation', 'existing']
NEU: ['officer', 'prison', 'prisoners', 'woman', 'someone', 'child', 'prisoner', 'offenders']

bypass
EU : ['provide', 'make', 'allow', 'ensure', 'avoid', 'prevent', 'establish', 'commit']
NEU: ['aNUMBER', 'road', 'member', 'town', 'mNUMBER', 'airport', 'towns', 'roads']

conversion
EU : ['david', 'comments', 'chancellor', 'majesty', 'speech

In [43]:
min_freq = 50
check_freq = lambda w, m: m.wv.vocab[w].count > min_freq
queries = [w[1] for w in eu_ranked_words_models if check_freq(w[1],eu_model) and check_freq(w[1],neu_model)]
queries = queries[:10]
queries

['sterling',
 'conversion',
 'honours',
 'bypass',
 'counting',
 'naturally',
 'en',
 'correcting',
 'privately',
 'contracting']

In [44]:
for query in queries:
    print(query)
    print("Con:", [x[0] for x in eu_model.wv.most_similar(query, topn=8)])
    print("Lab:", [x[0] for x in neu_model.wv.most_similar(query, topn=8)])
    print("")

sterling
Con: ['devaluation', 'depreciation', 'inflation', 'pound', 'unemployment', 'earnings', 'decline', 'decrease']
Lab: ['tireless', 'tremendous', 'preparatory', 'valiant', 'jamieson', 'steen', 'praising', 'superb']

conversion
Con: ['poem', 'julius', 'damascene', 'scarf', 'resistance', 'salt', 'cell', 'drama']
Lab: ['removal', 'convert', 'refusal', 'residence', 'promotion', 'conversions', 'reintroduction', 'damascene']

honours
Con: ['delivers', 'honouring', 'protects', 'secures', 'maintains', 'reaffirmed', 'undermines', 'supports']
Lab: ['bicameral', 'fixture', 'yellow', 'todger', 'eighth', 'medal', 'exhaustive', 'coveted']

bypass
Con: ['install', 'facilitate', 'fetter', 'assist', 'organise', 'constrain', 'initiate', 'hamper']
Lab: ['aNUMBER', 'expressway', 'tunnel', 'junction', 'mNUMBER', 'redevelopment', 'depot', 'stonehenge']

counting
Con: ['overlooking', 'footway', 'breeding', 'squeezing', 'pavements', 'assembled', 'freezing', 'lecturers']
Lab: ['spads', 'whammy', 'languish

# Remainers vs Leavers

In [23]:
remain = all_contributions.query("ref_stance == 'remain'")
leave = all_contributions.query("ref_stance == 'leave'")

In [46]:
%%time

# remain model
rem_model = Word2Vec(all_toks.loc[remain.index], size=300)

# leave model
lea_model = Word2Vec(all_toks.loc[leave.index], size=300)

Wall time: 1min 50s


In [47]:
%%time
vocab_rem, vectors_rem = get_top_vocab_and_vectors(rem_model)
vocab_lea, vectors_lea = get_top_vocab_and_vectors(lea_model)

Wall time: 58.5 ms


In [48]:
%%time
rem_lea_ranked_words_vectors = get_most_changey_words_with_vectors(vocab_rem, vocab_lea, vectors_rem, vectors_lea, k=1000)

Wall time: 1min 3s


In [49]:
rem_lea_ranked_words_vectors[:20]

[(166, 'duck'),
 (175, 'acknowledging'),
 (229, 'dodgy'),
 (240, 'rotten'),
 (257, 'gamble'),
 (260, 'bells'),
 (260, 'fixing'),
 (260, 'useless'),
 (262, 'reigate'),
 (270, 'anymore'),
 (270, 'recalled'),
 (271, 'mirrors'),
 (271, 'supposedly'),
 (272, 'wash'),
 (283, 'anticipation'),
 (285, 'relentlessly'),
 (286, 'meanwhile'),
 (290, 'ii'),
 (291, 'remotely'),
 (303, 'decisively')]

In [50]:
min_freq = 50
check_freq = lambda w, m: m.wv.vocab[w].count > min_freq
queries = [w[1] for w in rem_lea_ranked_words_vectors if check_freq(w[1],rem_model) and check_freq(w[1],lea_model)]
queries = queries[:10]
queries

['supposedly',
 'ii',
 'fareham',
 'plots',
 'google',
 'eastleigh',
 'crawley',
 'halt',
 'typical',
 'display']

In [51]:
for query in queries:
    print(query)
    print("Remain:", [x[0] for x in eu_model.wv.most_similar(query, topn=8)])
    print("Leaver:", [x[0] for x in neu_model.wv.most_similar(query, topn=8)])
    print("")

supposedly
Remain: ['plotting', 'routinely', 'barbarians', 'shredded', 'abusing', 'sued', 'investigating', 'brutally']
Leaver: ['defending', 'enforceable', 'bureaucrats', 'legally', 'asserting', 'cheating', 'attacking', 'essentially']

ii
Remain: ['ecris', 'database', 'ec', 'prisoner', 'sharing', 'europol', 'NUMBERc', 'eurojust']
Leaver: ['mckinnell', 'ag', 'girvan', 'bellingham', 'frith', 'beckett', 'k', 'g']

fareham
Remain: ['kirkcaldy', 'spelthorne', 'gedling', 'cowdenbeath', 'braintree', 'dwyfor', 'stalybridge', 'streatham']
Leaver: ['dewsbury', 'crawley', 'neath', 'eastleigh', 'chippenham', 'cheadle', 'newark', 'chichester']

plots
Remain: ['radicalism', 'kovtun', 'fgm', 'rapes', 'boko', 'islamism', 'sexually', 'preachers']
Leaver: ['attacks', 'murders', 'foiled', 'missiles', 'rockets', 'incidents', 'atrocities', 'sympathisers']

google
Remain: ['facebook', 'twitter', 'auschwitz', 'amazon', 'apple', 'asda', 'passchendaele', 'injuries']
Leaver: ['facebook', 'apple', 'amazon', 'twi

# Over Time

In [52]:
%%time
time_models = dict()
# Train a language model for various different portions of the forum.
for w, w_posts in get_time_windows(all_contributions, 365, 365, time_column="date"):
    time_models[w] = Word2Vec(all_toks.loc[w_posts.index], size=300)

Wall time: 2min 12s


In [24]:
def neighbours_over_time(search_term, time_models, top_n=10000):
    for window, curr_model in time_models.items():
        curr_vocab, curr_vectors = get_top_vocab_and_vectors(curr_model, top_n)
        print(window)
        if search_term in curr_vocab:
            print(neighbors(search_term, curr_vectors, curr_vocab, 12))

In [54]:
neighbours_over_time("leave", time_models)

2015-05-18 00:00:00
['stay', 'go', 'vote', 'lose', 'come', 'be', 'remain', 'take', 'get', 'move', 'tell', 'give']
2016-05-17 00:00:00
['referendum', 'remain', 'brexit', 'leaving', 'stay', 'left', 'lose', 'go', 'exit', 'vote', 'get', 'come']
2017-05-17 00:00:00
['referendum', 'exit', 'leaving', 'left', 'remain', 'vote', 'stay', 'lose', 'look', 'voted', 'move', 'get']
2018-05-17 00:00:00
['referendum', 'remain', 'leaving', 'stay', 'vote', 'left', 'get', 'go', 'exit', 'lose', 'brexit', 'deal']


In [55]:
neighbours_over_time("single", time_models)

2015-05-18 00:00:00
['every', 'one', 'one-', 'two-', 'union', 'free', 'parent', 'largest', 'third', 'each', 'child', 'person']
2016-05-17 00:00:00
['union', 'eu', 'european', 'labour', 'market', 'customs', 'every', 'common', 'vote', 'leave', 'one', 'an']
2017-05-17 00:00:00
['union', 'customs', 'labour', 'common', 'eu', 'euratom', 'eea', 'internal', 'one', 'vote', 'free', 'an']
2018-05-17 00:00:00
['common', 'customs', 'every', 'union', 'eu', 'one', 'labour', 'rule', 'an', 'market', 'any', 'free']


### Get examples for chapter

In [53]:
def neighbours_over_time_comma_delimited(query, time_models, top_n=10000):
    for window, curr_model in time_models.items():
        curr_vocab, curr_vectors = get_top_vocab_and_vectors(curr_model, top_n)
        if query in curr_vocab:
            print(window.strftime("%y/%m/%d"), end=",")
            print(",".join(neighbors(query, curr_vectors, curr_vocab, 6)))
        else:
            print(window)

In [93]:
for query in ["brexit", "referendum", "immigration", "single"]:
    print(query)
    neighbours_over_time_comma_delimited(query, time_models)
    print()

brexit
15/05/18,eu,vote,leave,european,election,union
16/05/17,referendum,eu,negotiations,leave,trade,vote
17/05/17,exit,eu,trade,referendum,union,negotiations
18/05/17,deal,referendum,vote,backstop,prime,trade

referendum
15/05/18,election,vote,elections,debate,parliament,period
16/05/17,vote,election,brexit,leave,voted,parliament
17/05/17,election,vote,leave,voted,brexit,negotiations
18/05/17,vote,election,brexit,voted,leave,article

immigration
15/05/18,welfare,criminal,justice,migration,sanctions,asylum
16/05/17,foreign,eu,prime,brexit,movement,trade
17/05/17,trade,eu,tax,legal,customs,foreign
18/05/17,trade,justice,fisheries,migration,tax,eu

single
15/05/18,every,one,one-,two-,union,free
16/05/17,union,eu,european,labour,market,customs
17/05/17,union,customs,labour,common,eu,euratom
18/05/17,common,customs,every,union,eu,one



### Changiest words

In [89]:
def get_changiest_words_per_window(time_models, top_n=10000, k=1000):
    out_dic = dict()
    windows = list(time_models.keys())
    for i in range(1, len(windows)):
        model_1 = time_models[windows[i-1]]
        model_2 = time_models[windows[i]]

        vocab_1, vectors_1 = get_top_vocab_and_vectors(model_1, top_n)
        vocab_2, vectors_2 = get_top_vocab_and_vectors(model_2, top_n)

        out_dic[windows[i]] = get_most_changey_words_with_vectors(vocab_1, vocab_2, vectors_1, vectors_2, k=k)

    return out_dic

In [57]:
%%time
changiest_words_per_window = get_changiest_words_per_window(time_models, 5000)

Wall time: 1min 19s


In [27]:
def print_changiest_over_time(changiest_words_per_window, min_freq=0):
    for window, changey_words in changiest_words_per_window.items():
        check_freq = lambda w, m: m.wv.vocab[w].count > min_freq
        queries = [w[1] for w in changey_words if check_freq(w[1],model)]
        queries = queries[:20]

        print(window)
#         t20_words = [f"{w[1]} {w[0]}" for w in changey_words[:20]]
        print("{:20} {:20} {:20} {:20} {:20}".format(*queries[:5]))
        print("{:20} {:20} {:20} {:20} {:20}".format(*queries[5:10]))
        print("{:20} {:20} {:20} {:20} {:20}".format(*queries[10:15]))
        print("{:20} {:20} {:20} {:20} {:20}".format(*queries[15:20]))
        print("-----------------------------")

In [119]:
print_changiest_over_time(changiest_words_per_window, 100)

2016-05-17 00:00:00
google               dog                  customs              style                similarly           
plain                bomb                 e-                   strikes              rbs                 
tv                   managing             exit                 grammar              independently       
trading              supreme              s.                   brexit               smith               
-----------------------------
2017-05-17 00:00:00
retained             osborne              selection            tower                super-              
principal            no-                  salisbury            radio                bbc                 
shipley              similarly            privatisation        wear                 s.                  
leigh                chemical             philip               semitism             continually         
-----------------------------
2018-05-17 00:00:00
offensive            grieve             

In [60]:
neighbours_over_time("customs", time_models)

2015-05-18 00:00:00
['gas', 'revenue', 'customs', 'trade', 'oil', 'energy', 'taxpayers', 'intelligence', 'insert', 'reduce', 'department', 'billion']
2016-05-17 00:00:00
['customs', 'eu', 'trade', 'market', 'europe', 'credit', 'movement', 'national', 'single', 'trading', 'membership', 'tax']
2017-05-17 00:00:00
['european', 'trade', 'eu', 'border', 'euratom', 'market', 'brexit', 'credit', 'regulatory', 'eea', 'transitional', 'withdrawal']
2018-05-17 00:00:00
['european', 'backstop', 'deal', 'trade', 'agreement', 'eu', 'border', 'market', 'regulatory', 'relationship', 'arrangement', 'article']


In [61]:
neighbours_over_time("brexit", time_models)

2015-05-18 00:00:00
['eu', 'vote', 'leave', 'european', 'election', 'union', 'membership', 'stay', 'europe', 'president', 'campaign', 'leader']
2016-05-17 00:00:00
['referendum', 'eu', 'negotiations', 'leave', 'trade', 'vote', 'exit', 'what', 'article', 'uncertainty', 'union', 'state']
2017-05-17 00:00:00
['exit', 'eu', 'trade', 'referendum', 'union', 'negotiations', 'border', 'vote', 'deal', 'transition', 'customs', 'foreign']
2018-05-17 00:00:00
['deal', 'referendum', 'vote', 'backstop', 'prime', 'trade', 'exit', 'union', 'eu', 'leave', 'border', 'state']


In [62]:
neighbours_over_time("zero", time_models)

2015-05-18 00:00:00
['carbon', 'rate', 'emissions', 'lower', 'zero', 'gdp', 'deficit', 'higher', 'reduction', 'growth', 'low', 'price']
2016-05-17 00:00:00
['rate', 'higher', 'income', 'lower', 'rates', 'low', '£', 'average', 'gdp', 'prices', 'price', 'increase']
2017-05-17 00:00:00
['£', 'rate', 'average', 'zero', 'rates', 'lower', 'growth', 'NUMBER', 'tariffs', 'income', 'homes', 'billion']
2018-05-17 00:00:00
['carbon', 'emissions', 'NUMBER%', 'reduce', 'net', 'growth', 'gdp', 'tariffs', 'global', 'rate', 'low', 'gas']


In [63]:
neighbours_over_time("tower", time_models)

2015-05-18 00:00:00
['st', 'south', 'royal', 'city', '(', 'borough', 'towns', 'park', 'station', 'glasgow', 'street', 'county']
2016-05-17 00:00:00
['(', 'st', 'hospital', 'royal', 'city', 'borough', 'centre', 'college', 'constituency', 'county', 'park', 'station']
2017-05-17 00:00:00
['grenfell', 'blocks', 'fire', 'homes', 'cladding', 'accommodation', 'tragedy', 'residents', 'happened', 'buildings', 'safety', 'hospital']
2018-05-17 00:00:00
['tower', 'hospital', 'died', 'london', 'street', 'constituency', 'fire', 'grenfell', 'visited', 'park', 'station', 'st']


In [64]:
neighbours_over_time("exit", time_models)

2015-05-18 00:00:00
['benefit', 'pension', 'overseas', 'income', 'payment', 'annual', 'payments', 'compensation', 'licence', 'receive', 'exemption', 'taxpayer']
2016-05-17 00:00:00
['negotiations', 'brexit', 'article', 'leave', 'referendum', 'eu', 'agreement', 'withdrawal', 'membership', 'leaving', 'departure', 'negotiating']
2017-05-17 00:00:00
['withdrawal', 'brexit', 'eu', 'leave', 'law', 'negotiations', 'implementation', 'transition', 'statute', 'referendum', 'agreement', 'legislation']
2018-05-17 00:00:00
['brexit', 'leave', 'leaving', 'article', 'withdrawal', 'eu', 'departure', 'referendum', 'relationship', 'deal', 'extension', 'negotiations']


In [108]:
neighbours_over_time("no-", time_models)

2015-05-18 00:00:00
['nuclear', 'air', 'where', 'anywhere', 'no-', 'terrorist', 'towards', 'assad', 'isil', 'without', 't', 'beyond']
2016-05-17 00:00:00
['nuclear', 'or', 'zone', 'create', 'fly', 'weapons', 'competitive', 'where', 'car', 'uk-', 'property', 'two-']
2017-05-17 00:00:00
['brexit', 'transitional', 'trade', 'no', 'great', 'transition', 'bad', 'without', 'with', 'border', 'cliff', 'withdrawal']
2018-05-17 00:00:00
['no', 'without', 'bad', 'negotiated', 'great', 'scenario', 'any', 'post-', 'brexit', 'chequers', 'vote', 'contingency']


In [120]:
neighbours_over_time("permanent", time_models)

2015-05-18 00:00:00
['foreign', 'home', 'permanent', 'cabinet', 'defence', 'chief', 'financial', 'private', 'states', 'former', 'justice', 'exchequer']
2016-05-17 00:00:00
['home', 'state', 'permanent', 'cabinet', 'defence', 'financial', 'states', 'chief', 'private', 'justice', 'former', 'rights']
2017-05-17 00:00:00
['home', 'state', 'foreign', 'cabinet', 'accommodation', 'private', 'environment', 'homes', 'chief', 'temporary', 'financial', 'housing']
2018-05-17 00:00:00
['state', 'permanent', 'home', 'cabinet', 'customs', 'defence', 'transport', 'environment', 'chief', 'trade', 'brexit', 'general']


In [121]:
for query in ["brexit", "customs", "strike", "tower", "salisbury", "no-"]:
    print(query)
    neighbours_over_time_comma_delimited(query, time_models)
    print()

brexit
15/05/18,eu,vote,leave,european,election,union
16/05/17,referendum,eu,negotiations,leave,trade,vote
17/05/17,exit,eu,trade,referendum,union,negotiations
18/05/17,deal,referendum,vote,backstop,prime,trade

customs
15/05/18,gas,revenue,customs,trade,oil,energy
16/05/17,customs,eu,trade,market,europe,credit
17/05/17,european,trade,eu,border,euratom,market
18/05/17,european,backstop,deal,trade,agreement,eu

strike
15/05/18,take,taken,vote,thing,industrial,action
16/05/17,strike,get,carry,be,hold,taken
17/05/17,take,get,strike,give,be,carry
18/05/17,negotiate,be,get,take,reach,create

tower
15/05/18,st,south,royal,city,(,borough
16/05/17,(,st,hospital,royal,city,borough
17/05/17,grenfell,blocks,fire,homes,cladding,accommodation
18/05/17,tower,hospital,died,london,street,constituency

salisbury
15/05/18,royal,tribute,st,john,james,town
16/05/17,hon,south,north,east,states,west
17/05/17,grenfell,war,attack,syria,events,terrorist
18/05/17,killed,war,attacks,died,events,syria

no-
15/05/

### Changiest Words Conservative vs Labour

In [28]:
%%time
con_time_models = dict()
lab_time_models = dict()
# Train a language model for various different portions of the forum.
for w, w_posts in get_time_windows(all_contributions, 365, 365, time_column="date"):
    curr_con = w_posts[w_posts.index.isin(conservatives.index)].index
    curr_lab = w_posts[w_posts.index.isin(labour.index)].index
    
    con_time_models[w] = Word2Vec(all_toks.loc[curr_con], size=300)
    lab_time_models[w] = Word2Vec(all_toks.loc[curr_lab], size=300)

Wall time: 2min 9s


In [29]:
%%time
con_changiest_words_per_window = get_changiest_words_per_window(con_time_models, 5000)

Wall time: 1min 18s


In [60]:
print_changiest_over_time(con_changiest_words_per_window, 100)

2016-05-17 00:00:00
selection            dual                 upper                customs              reflecting          
google               english              blue                 purely               red                 
presumably           typical              strikes              similarly            precious            
stark                moreover             green                exit                 regards             
-----------------------------
2017-05-17 00:00:00
style                selection            henry                chaos                thereafter          
retained             hopes                naturally            moreover             bearing             
shock                text                 no-                  leasehold            inadvertently       
journalists          separately           precious             ends                 merit               
-----------------------------
2018-05-17 00:00:00
e-                   forever            

In [56]:
def compare_neighours(query):
    print("Conservative")
    neighbours_over_time(query, con_time_models)
    print("\nLabour")
    neighbours_over_time(query, lab_time_models)
    
def compare_neighours_comma_delimited(query):
    print("Conservative")
    neighbours_over_time_comma_delimited(query, con_time_models)
    print("\nLabour")
    neighbours_over_time_comma_delimited(query, lab_time_models)

In [57]:
compare_neighours_comma_delimited("chaos")

Conservative
15/05/18,iraq,isil,daesh,war,conflict,threat
16/05/17,happened,war,weapons,ago,east,arabia
17/05/17,no,court,system,risk,regime,pension
18/05/17,deal,border,referendum,brexit,backstop,risk

Labour
15/05/18,department,crisis,spending,impact,office,recent
16/05/17,country,world,investment,funding,past,budget
17/05/17,debate,policy,budget,NUMBER%,question,problem
18/05/17,deal,austerity,brexit,crisis,economy,world


In [62]:
compare_neighours_comma_delimited("no-")

Conservative
15/05/18,terrorist,into,daesh,air,isil,global
16/05/17,nuclear,parking,property,trade,car,&
17/05/17,trade,no,customs,without,long-,brexit
18/05/17,no,without,with,great,negotiated,brexit

Labour
15/05/18,such,between,free,low-,against,high-
16/05/17,&,free,long-,low-,year-,between
17/05/17,with,transitional,brexit,no,deal,agreement
18/05/17,no,brexit,without,trade,any,vote


In [63]:
compare_neighours_comma_delimited("customs")

Conservative
15/05/18,tax,european,majesty,trade,taxpayers,billion
16/05/17,customs,eu,trade,europe,national,single
17/05/17,european,eu,trade,europe,market,withdrawal
18/05/17,european,eu,backstop,trade,deal,agreement

Labour
15/05/18,majesty,’,allowance,cut,department,green
16/05/17,eu,trade,customs,market,union,national
17/05/17,european,trade,eu,market,agreement,brexit
18/05/17,european,trade,eu,agreement,deal,market


In [64]:
compare_neighours_comma_delimited("exit")

Conservative
15/05/18,income,eu,rate,insurance,£,exit
16/05/17,eu,negotiations,brexit,european,agreement,leave
17/05/17,eu,withdrawal,leave,negotiations,law,brexit
18/05/17,leave,eu,deal,period,leaving,article

Labour
15/05/18,income,impact,£,increase,tax,NUMBER%
16/05/17,european,trade,market,union,brexit,agreement
17/05/17,eu,period,agreement,brexit,day,european
18/05/17,eu,withdrawal,article,exit,trade,customs


In [34]:
%%time
lab_changiest_words_per_window = get_changiest_words_per_window(lab_time_models, 5000)

Wall time: 1min 15s


In [67]:
print_changiest_over_time(lab_changiest_words_per_window, 100)

2016-05-17 00:00:00
managing             irresponsible        stem                 half-                differently         
rarely               breaking             firmly               dog                  presumably          
definitely           detention            court                constant             fashion             
loophole             empty                bold                 english              assad               
-----------------------------
2017-05-17 00:00:00
presumably           bbc                  selection            basically            differently         
promising            worthy               content              continually          fashion             
backwards            approaching          dreadful             normal               retained            
binding              witness              no-                  russians             donations           
-----------------------------
2018-05-17 00:00:00
politically          agent              

In [65]:
compare_neighours_comma_delimited("sovereign")

Conservative
15/05/18,united,sovereign,play,european,democratic,proud
16/05/17,european,sovereign,pension,vote,against,member
17/05/17,global,nuclear,member,european,nation,eu
18/05/17,european,law,customs,control,democratic,nation

Labour
15/05/18,member,leader,united,affairs,office,european
16/05/17,united,vote,members,leader,referendum,this
17/05/17,our,security,democracy,must,international,armed
18/05/17,united,member,parliament,party,british,scottish


In [66]:
compare_neighours_comma_delimited("sovereignty")

Conservative
15/05/18,rights,sovereignty,membership,nation,members,interests
16/05/17,party,referendum,parliament,vote,democracy,sovereignty
17/05/17,parliament,democracy,law,membership,rights,sovereignty
18/05/17,interests,democracy,rights,referendum,voted,vote

Labour
15/05/18,eu,party,united,role,european,membership
16/05/17,debate,leader,parliament,party,secretary,house
17/05/17,debate,democracy,committee,parliament,membership,role
18/05/17,agreement,economy,country,parliament,future,democracy


In [72]:
compare_neighours_comma_delimited("brexit")

Conservative
15/05/18,”,NUMBER%,mr,madam,referendum,insert
16/05/17,eu,referendum,negotiations,union,trade,european
17/05/17,eu,agreement,trade,negotiations,exit,period
18/05/17,deal,referendum,vote,backstop,us,agreement

Labour
15/05/18,united,page,european,amendment,election,vote
16/05/17,eu,referendum,us,european,vote,trade
17/05/17,trade,state,foreign,eu,customs,deal
18/05/17,deal,vote,state,prime,trade,eu


### Changiest Words Remain vs Leave

In [37]:
%%time
rem_time_models = dict()
lea_time_models = dict()
# Train a language model for various different portions of the forum.
for w, w_posts in get_time_windows(all_contributions, 365, 365, time_column="date"):
    curr_rem = w_posts[w_posts.index.isin(remain.index)].index
    curr_lea = w_posts[w_posts.index.isin(leave.index)].index
    
    rem_time_models[w] = Word2Vec(all_toks.loc[curr_rem], size=300)
    lea_time_models[w] = Word2Vec(all_toks.loc[curr_lea], size=300)

Wall time: 2min 20s


In [74]:
def compare_neighours(query):
    print("Remain")
    neighbours_over_time(query, rem_time_models)
    print("\nLeave")
    neighbours_over_time(query, lea_time_models)
    
def compare_neighours_comma_delimited(query):
    print("Remain")
    neighbours_over_time_comma_delimited(query, rem_time_models)
    print("\nLeave")
    neighbours_over_time_comma_delimited(query, lea_time_models)

In [106]:
%%time
rem_changiest_words_per_window = get_changiest_words_per_window(rem_time_models, 5000, k=100)

Wall time: 18.2 s


In [105]:
print_changiest_over_time(rem_changiest_words_per_window, 100)

2016-05-17 00:00:00
cell                 clubs                e-                   essentially          exit                
facility             flooding             flow                 for-                 gift                
guaranteed           hub                  integration          managing             nationally          
no-                  parental             permitted            plain                pockets             
-----------------------------
2017-05-17 00:00:00
al-                  appointments         awards               books                cards               
channel              childhood            cold                 collectively         cycle               
diesel               facebook             financing            fresh                functioning         
medicines            no-                  pathway              precious             prejudice           
-----------------------------
2018-05-17 00:00:00
adding               appeals            

In [92]:
%%time
lea_changiest_words_per_window = get_changiest_words_per_window(lea_time_models, 5000, k=100)

Wall time: 17.1 s


In [93]:
print_changiest_over_time(lea_changiest_words_per_window, 100)

2016-05-17 00:00:00
claimant             naturally            elect                progressive          barnett             
bold                 explaining           highlighting         lock                 broadly             
orders               attempts             customs              inappropriate        polish              
re-                  settled              cards                command              platform            
-----------------------------
2017-05-17 00:00:00
restoration          hopefully            challenged           limitations          spot                
definitely           limits               retained             returns              compelling          
explaining           taught               block                explicit             formal              
formed               legacy               pride                references           substance           
-----------------------------
2018-05-17 00:00:00
temporary            forecasts          

In [55]:
compare_neighours_comma_delimited("sovereign")

Remain
15/05/18,devolved,member,secretary,democratic,european,united
16/05/17,democratic,united,leader,democracy,sovereign,vote
17/05/17,european,nuclear,nation,democratic,democracy,role
18/05/17,nation,independent,european,democratic,british,democracy

Leave
15/05/18,european,rights,eu,our,nation,protect
16/05/17,british,european,member,leader,kingdom,leave
17/05/17,(,european,customs,international,rights,united
18/05/17,customs,eu,united,law,independent,citizens


In [76]:
compare_neighours_comma_delimited("mess")

Remain
15/05/18,problem,deficit,step,situation,budget,point
16/05/17,country,point,thing,step,situation,economy
17/05/17,step,situation,problem,crisis,position,mess
18/05/17,country,situation,deal,crisis,position,point

Leave
15/05/18,set,NUMBER%,labour,year,carried,tax
16/05/17,set,),£,point,pointed,carried
17/05/17,country,money,way,year,legislation,period
18/05/17,),mr,point,sir,being,were


In [107]:
compare_neighours_comma_delimited("progressive")

Remain
15/05/18,political,positive,common,democratic,cross-,effective
16/05/17,strong,modern,sustainable,global,important,positive
17/05/17,long-,low,tax,different,strong,common
18/05/17,customs,sustainable,co-,trade,common,based

Leave
15/05/18,dealing,working,mental,relationship,compared,million
16/05/17,’,s,all-,long-,its,national
17/05/17,s,),withdrawal,member,rights,union
18/05/17,withdrawal,long-,term,co-,(,economic
