In [1]:
import os
os.getcwd()
from datetime import datetime
import pickle
import random
import re
import numpy as np
random.seed(21)
import torch
from transformers import DistilBertForMaskedLM, DistilBertConfig, AutoTokenizer

2023-06-16 02:27:00.150501: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


# TEXT DATA

### Sentences Containing Country Cases

In [2]:
year_list = [
    '1900_1909',
    '1910_1919',
    '1920_1929',
    '1930_1939',
    '1940_1949',
    '1950_1959',
    '1960_1969',
    '1970_1979',
    '1980_1989',
    '1990_1999',
    '2000_2009',
    '2010_2020'
]

In [3]:
country_list = ['china', 'north korea', 'south korea', 'canada', 'united kingdom', 'germany']

In [4]:
country_sentence_dict = {}

In [5]:
country_mask_dict = {}

In [6]:
def clean_sents(sents, term):
    terms = term.split(" ")
    sents = [sent.strip() for sent in sents]
    mask = [all(item in sent.split(" ") for item in terms) for sent in sents]
    sents = np.array(sents)
    result = sents[mask]
    
    return result.tolist(), mask

In [7]:
# mask isn't necessarily needed anymore but might be useful somehow
def get_decade_sentences(years, term):
    year_dict = {}
    mask_dict = {}
    for year in years:
        with open('./crs_corpus/decade_' + year + '.txt') as f:
            sents = [line for line in f.readlines() if term in line]
            sents, mask = clean_sents(sents, term)
            year_dict[year] = sents
            mask_dict[year] = mask
            
    return year_dict, mask_dict

#### "China"

In [8]:
sents_china, mask_dict = get_decade_sentences(year_list, 'china')
country_sentence_dict['china'] = sents_china
country_mask_dict['china'] = mask_dict

#### "North Korea"

In [9]:
sents_north_korea, mask_dict = get_decade_sentences(year_list, 'north korea')
country_sentence_dict['north korea'] = sents_north_korea
country_mask_dict['north korea'] = mask_dict

#### "South Korea"

In [10]:
sents_south_korea, mask_dict = get_decade_sentences(year_list, 'south korea')
country_sentence_dict['south korea'] = sents_south_korea
country_mask_dict['south korea'] = mask_dict

#### "Canada"

In [11]:
sents_canada, mask_dict = get_decade_sentences(year_list, 'canada')
country_sentence_dict['canada'] = sents_canada
country_mask_dict['canada'] = mask_dict

#### "United Kingdom"

In [12]:
sents_united_kingdom, mask_dict = get_decade_sentences(year_list, 'united kingdom')
country_sentence_dict['united kingdom'] = sents_united_kingdom
country_mask_dict['united kingdom'] = mask_dict

#### "Germany"

In [13]:
sents_germany, mask_dict = get_decade_sentences(year_list, 'germany')
country_sentence_dict['germany'] = sents_germany
country_mask_dict['germany'] = mask_dict

In [14]:
# SAVE COUNTRY MASKS

with open('./crs_sents/country_sent_masks.pkl', 'wb') as handle:
    pickle.dump(country_mask_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [15]:
# LOAD COUNTRY MASKS

#with open('./crs_sents/country_sent_masks.pkl', 'rb') as handle:
#    country_mask_dict = pickle.load(handle)

In [16]:
# SAVE COUNTRY SENTENCES

with open('./crs_sents/country_sents.pkl', 'wb') as handle:
    pickle.dump(country_sentence_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [17]:
# LOAD COUNTRY SENTENCES

#with open('./crs_sents/country_sents.pkl', 'rb') as handle:
#    country_sentence_dict = pickle.load(handle)

In [73]:
concept_list = ['authoritarianism',
                'autocracy',
                'autocratic',
                'democracy',
                'dictator',
                'dictatorship']

### Sentences Containing Concepts

Same thing for the following concepts:

In [74]:
concept_sentence_dict = {}

In [75]:
concept_mask_dict = {}

#### "authoritarianism"

In [78]:
sents_authoritarianism, mask_dict = get_decade_sentences(year_list, 'authoritarianism')
concept_sentence_dict['authoritarianism'] = sents_authoritarianism
concept_mask_dict['authoritarianism'] = mask_dict

#### "autocracy"

In [79]:
sents_autocracy, mask_dict = get_decade_sentences(year_list, 'autocracy')
concept_sentence_dict['autocracy'] = sents_autocracy
concept_mask_dict['autocracy'] = mask_dict

#### "autocratic"

In [80]:
sents_autocratic, mask_dict = get_decade_sentences(year_list, 'autocratic')
concept_sentence_dict['autocratic'] = sents_autocratic
concept_mask_dict['autocratic'] = mask_dict

#### "democracy"

In [81]:
sents_democracy, mask_dict = get_decade_sentences(year_list, 'democracy')
concept_sentence_dict['democracy'] = sents_democracy
concept_mask_dict['democracy'] = mask_dict

#### "dictator"

In [82]:
sents_dictator, mask_dict = get_decade_sentences(year_list, 'dictator')
concept_sentence_dict['dictator'] = sents_dictator
concept_mask_dict['dictator'] = mask_dict

#### "dictatorship"

In [83]:
sents_dictatorship, mask_dict = get_decade_sentences(year_list, 'dictatorship')
concept_sentence_dict['dictatorship'] = sents_dictatorship
concept_mask_dict['dictatorship'] = mask_dict

In [85]:
# SAVE CONCEPT SENTENCE MASKS

with open('./crs_sents/concept_sent_masks.pkl', 'wb') as handle:
    pickle.dump(concept_mask_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [86]:
# LOAD CONCEPT SENTENCE MASKS

#with open('./crs_sents/concept_sent_masks.pkl', 'rb') as handle:
#    concept_mask_dict = pickle.load(handle)

In [87]:
# SAVE CONCEPT SENTENCES

with open('./crs_sents/concept_sents.pkl', 'wb') as handle:
    pickle.dump(concept_sentence_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [32]:
# LOAD CONCEPT SENTENCES

#with open('./crs_sents/concept_sents.pkl', 'rb') as handle:
#    concept_sentence_dict = pickle.load(handle)

# MODELS & FUNCTIONS

### Custom Functions

In [33]:
layers = [-3, -2, -1]


In [34]:
def get_word_idx(sent: str, word: str):
    if ' ' in word:
        word = word.split(' ')
        sent = sent.split(' ')
        idx_list = [sent.index(x) for x in word]
        return idx_list
    else:
        return sent.split(" ").index(word)

def get_hidden_states(encoded, token_ids_word, model, layers):
    """Push input IDs through model. Stack and sum `layers` (last four by default).
       Select only those subword token outputs that belong to our word of interest
       and average them."""
    with torch.no_grad():
        output = model(**encoded)

    # Get all hidden states
    states = output.hidden_states
    # Stack and sum all requested layers
    output = torch.stack([states[i] for i in layers]).sum(0).squeeze()
    # Only select the tokens that constitute the requested word
    word_tokens_output = output[token_ids_word]

    return word_tokens_output.mean(dim=0)


def get_word_vector(sent, idx, tokenizer, model, layers):
    """Get a word vector by first tokenizing the input sentence, getting all token idxs
       that make up the word of interest, and then `get_hidden_states`."""
    encoded = tokenizer.encode_plus(sent, truncation=True, return_tensors="pt")
    # get all token idxs that belong to the word of interest
    
    if isinstance(idx, list):

        token_ids_word = np.where(np.isin(np.array(encoded.word_ids()), (idx)))
    else:
        token_ids_word = np.where(np.array(encoded.word_ids()) == idx)

    return get_hidden_states(encoded, token_ids_word, model, layers)

In [35]:
def load_decade_model(year):
    config = DistilBertConfig.from_json_file('./crs_models/' + year + '/config.json')
    config.output_hidden_states = True
    model = DistilBertForMaskedLM(config)
    model.load_state_dict(torch.load('./crs_models/' + year + '/pytorch_model.bin', map_location = torch.device("cuda")))
    model.eval()
    tokenizer = AutoTokenizer.from_pretrained('./crs_models/' + year + '/')
    return model, tokenizer

In [36]:
def extract_embedding(sent, term, tokenizer, model, layers):
    idx = get_word_idx(sent, term)
    term_tensor = get_word_vector(sent, idx, tokenizer, model, layers)
    return term_tensor

In [37]:
# No longer needed but useful for the future
def ensure_spaces(sent, term):
    sent = re.sub(rf'(?<![ ])({term})', r' \1', sent)
    sent = re.sub(rf'({term})(?![ ])', r'\1 ', sent)
    
    return sent

In [38]:
def get_embeddings_from_sents(year_list, term, sentence_dict, layers, random_sample = True):
    
    vector_dict = {}
    
    if random_sample == True:
        shortest_decade = min([len(sentence_dict[term][x]) for x in sentence_dict[term]])
    print("Starting " + term + " at " + str(datetime.now()))    
    for year in year_list:
        vector_list = []
        
        sents = sentence_dict[term][year]
        
        if len(sents) == 0:
            vector_dict[year] = None
        else:
        
            year_model, year_tokenizer = load_decade_model(year)


            if random_sample == True:
                sents = random.sample(sents,shortest_decade)

            for sent in sents:
                vector = extract_embedding(sent, term, year_tokenizer, year_model, layers)
                vector_list.append(vector)

            vector_list = [v.detach().cpu().numpy() for v in vector_list]    
            vector_dict[year] = vector_list
        print("Finished " + year + " at " + str(datetime.now()))
    return vector_dict

# CONTEXTUAL EMBEDDINGS FOR COUNTRIES

### China

In [39]:
country_embeddings = {}

In [40]:
te_china = get_embeddings_from_sents(year_list, 'china', country_sentence_dict, layers, random_sample = False)

Starting china at 2023-06-16 02:27:41.689159
Finished 1900_1909 at 2023-06-16 02:27:47.989825
Finished 1910_1919 at 2023-06-16 02:27:53.653754
Finished 1920_1929 at 2023-06-16 02:27:59.841856
Finished 1930_1939 at 2023-06-16 02:28:07.295891
Finished 1940_1949 at 2023-06-16 02:28:21.201747
Finished 1950_1959 at 2023-06-16 02:28:39.563573
Finished 1960_1969 at 2023-06-16 02:29:02.294407
Finished 1970_1979 at 2023-06-16 02:29:29.859733
Finished 1980_1989 at 2023-06-16 02:30:06.590811
Finished 1990_1999 at 2023-06-16 02:31:27.322137
Finished 2000_2009 at 2023-06-16 02:33:40.693193
Finished 2010_2020 at 2023-06-16 02:36:00.336003


In [41]:
country_embeddings['china'] = te_china

### North Korea

In [42]:
te_north_korea = get_embeddings_from_sents(year_list, 'north korea', country_sentence_dict, layers, random_sample = False)

Starting north korea at 2023-06-16 02:36:00.365076
Finished 1900_1909 at 2023-06-16 02:36:00.365211
Finished 1910_1919 at 2023-06-16 02:36:00.365231
Finished 1920_1929 at 2023-06-16 02:36:00.365255
Finished 1930_1939 at 2023-06-16 02:36:00.365272
Finished 1940_1949 at 2023-06-16 02:36:03.410637
Finished 1950_1959 at 2023-06-16 02:36:06.692288
Finished 1960_1969 at 2023-06-16 02:36:10.075570
Finished 1970_1979 at 2023-06-16 02:36:13.789492
Finished 1980_1989 at 2023-06-16 02:36:18.320511
Finished 1990_1999 at 2023-06-16 02:36:26.019112
Finished 2000_2009 at 2023-06-16 02:36:41.032234
Finished 2010_2020 at 2023-06-16 02:36:55.984015


In [43]:
country_embeddings['north korea'] = te_north_korea

### South Korea

In [44]:
te_south_korea = get_embeddings_from_sents(year_list, 'south korea', country_sentence_dict, layers, random_sample = False)

Starting south korea at 2023-06-16 02:36:56.007715
Finished 1900_1909 at 2023-06-16 02:36:56.007765
Finished 1910_1919 at 2023-06-16 02:36:56.007779
Finished 1920_1929 at 2023-06-16 02:36:56.007789
Finished 1930_1939 at 2023-06-16 02:36:56.007798
Finished 1940_1949 at 2023-06-16 02:36:59.038216
Finished 1950_1959 at 2023-06-16 02:37:02.451884
Finished 1960_1969 at 2023-06-16 02:37:05.759869
Finished 1970_1979 at 2023-06-16 02:37:10.137698
Finished 1980_1989 at 2023-06-16 02:37:16.547567
Finished 1990_1999 at 2023-06-16 02:37:28.247878
Finished 2000_2009 at 2023-06-16 02:37:43.641695
Finished 2010_2020 at 2023-06-16 02:38:00.088417


In [45]:
country_embeddings['south korea'] = te_south_korea

### Canada

In [46]:
te_canada = get_embeddings_from_sents(year_list, 'canada', country_sentence_dict, layers, random_sample = False)

Starting canada at 2023-06-16 02:38:00.115306
Finished 1900_1909 at 2023-06-16 02:38:03.437875
Finished 1910_1919 at 2023-06-16 02:38:08.227189
Finished 1920_1929 at 2023-06-16 02:38:13.448247
Finished 1930_1939 at 2023-06-16 02:38:20.504024
Finished 1940_1949 at 2023-06-16 02:38:33.083437
Finished 1950_1959 at 2023-06-16 02:38:42.510285
Finished 1960_1969 at 2023-06-16 02:38:52.874067
Finished 1970_1979 at 2023-06-16 02:39:10.096580
Finished 1980_1989 at 2023-06-16 02:39:33.736903
Finished 1990_1999 at 2023-06-16 02:40:17.542112
Finished 2000_2009 at 2023-06-16 02:41:23.913936
Finished 2010_2020 at 2023-06-16 02:42:21.809694


In [47]:
country_embeddings['canada'] = te_canada

### United Kingdom

In [48]:
te_united_kingdom = get_embeddings_from_sents(year_list, 'united kingdom', country_sentence_dict, layers, random_sample = False)

Starting united kingdom at 2023-06-16 02:42:21.844634
Finished 1900_1909 at 2023-06-16 02:42:25.554399
Finished 1910_1919 at 2023-06-16 02:42:29.319631
Finished 1920_1929 at 2023-06-16 02:42:33.021176
Finished 1930_1939 at 2023-06-16 02:42:36.865201
Finished 1940_1949 at 2023-06-16 02:42:41.364643
Finished 1950_1959 at 2023-06-16 02:42:48.016286
Finished 1960_1969 at 2023-06-16 02:42:55.410034
Finished 1970_1979 at 2023-06-16 02:43:02.476876
Finished 1980_1989 at 2023-06-16 02:43:12.192708
Finished 1990_1999 at 2023-06-16 02:43:29.849656
Finished 2000_2009 at 2023-06-16 02:43:57.871632
Finished 2010_2020 at 2023-06-16 02:44:21.888468


In [49]:
country_embeddings['united kingdom'] = te_united_kingdom

### Germany

In [50]:
te_germany = get_embeddings_from_sents(year_list, 'germany', country_sentence_dict, layers, random_sample = False)

Starting germany at 2023-06-16 02:44:21.921232
Finished 1900_1909 at 2023-06-16 02:44:26.983645
Finished 1910_1919 at 2023-06-16 02:44:38.984978
Finished 1920_1929 at 2023-06-16 02:44:48.991448
Finished 1930_1939 at 2023-06-16 02:45:09.779740
Finished 1940_1949 at 2023-06-16 02:45:42.425888
Finished 1950_1959 at 2023-06-16 02:46:09.365998
Finished 1960_1969 at 2023-06-16 02:46:35.897531
Finished 1970_1979 at 2023-06-16 02:47:13.586993
Finished 1980_1989 at 2023-06-16 02:48:08.496424
Finished 1990_1999 at 2023-06-16 02:50:18.177340
Finished 2000_2009 at 2023-06-16 02:52:07.407327
Finished 2010_2020 at 2023-06-16 02:53:38.446727


In [51]:
country_embeddings['germany'] = te_germany

In [52]:
# Not every country appears in every decade

for country in ['canada', 'united kingdom', 'germany', 'north korea', 'south korea']:
    for year in year_list:
        if country_embeddings[country][year] != None:
            print(country)
            print(year)
            print(len(country_embeddings[country][year]))

canada
1900_1909
18
canada
1910_1919
66
canada
1920_1929
82
canada
1930_1939
149
canada
1940_1949
318
canada
1950_1959
222
canada
1960_1969
253
canada
1970_1979
537
canada
1980_1989
721
canada
1990_1999
1502
canada
2000_2009
1769
canada
2010_2020
1300
united kingdom
1900_1909
17
united kingdom
1910_1919
10
united kingdom
1920_1929
9
united kingdom
1930_1939
17
united kingdom
1940_1949
39
united kingdom
1950_1959
80
united kingdom
1960_1969
92
united kingdom
1970_1979
88
united kingdom
1980_1989
153
united kingdom
1990_1999
324
united kingdom
2000_2009
532
united kingdom
2010_2020
454
germany
1900_1909
39
germany
1910_1919
183
germany
1920_1929
152
germany
1930_1939
387
germany
1940_1949
630
germany
1950_1959
530
germany
1960_1969
507
germany
1970_1979
764
germany
1980_1989
1124
germany
1990_1999
2761
germany
2000_2009
2310
germany
2010_2020
1952
north korea
1940_1949
2
north korea
1950_1959
13
north korea
1960_1969
17
north korea
1970_1979
28
north korea
1980_1989
57
north korea
1990_1

In [53]:
# SAVE COUNTRY EMBEDDINGS

with open('./crs_embeds/country_embeds.pkl', 'wb') as handle:
    pickle.dump(country_embeddings, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [54]:
# LOAD COUNTRY EMBEDDINGS

#with open('./crs_embeds/country_embeds.pkl', 'rb') as handle:
#    country_embeddings = pickle.load(handle)

# (CONTEXTUAL) EMBEDDINGS FOR THEORETICAL CONCEPTS

Same thing for the following concepts listed above.

In [88]:
concept_embeddings = {}

#### "authoritarianism"

In [89]:
te_authoritarianism = get_embeddings_from_sents(year_list, 'authoritarianism', concept_sentence_dict, layers, random_sample = False)

Starting authoritarianism at 2023-06-16 04:44:32.040125
Finished 1900_1909 at 2023-06-16 04:44:32.040240
Finished 1910_1919 at 2023-06-16 04:44:32.040273
Finished 1920_1929 at 2023-06-16 04:44:35.158662
Finished 1930_1939 at 2023-06-16 04:44:38.260987
Finished 1940_1949 at 2023-06-16 04:44:41.662581
Finished 1950_1959 at 2023-06-16 04:44:45.289243
Finished 1960_1969 at 2023-06-16 04:44:49.385495
Finished 1970_1979 at 2023-06-16 04:44:54.044396
Finished 1980_1989 at 2023-06-16 04:45:00.592283
Finished 1990_1999 at 2023-06-16 04:45:12.047924
Finished 2000_2009 at 2023-06-16 04:45:22.272721
Finished 2010_2020 at 2023-06-16 04:45:32.172229


In [91]:
concept_embeddings['authoritarianism'] = te_authoritarianism

#### "autocracy"

In [92]:
te_autocracy = get_embeddings_from_sents(year_list, 'autocracy', concept_sentence_dict, layers, random_sample = False)

Starting autocracy at 2023-06-16 04:46:48.467546
Finished 1900_1909 at 2023-06-16 04:46:51.631813
Finished 1910_1919 at 2023-06-16 04:46:55.496677
Finished 1920_1929 at 2023-06-16 04:46:58.834558
Finished 1930_1939 at 2023-06-16 04:47:01.940681
Finished 1940_1949 at 2023-06-16 04:47:05.198069
Finished 1950_1959 at 2023-06-16 04:47:08.733583
Finished 1960_1969 at 2023-06-16 04:47:12.359848
Finished 1970_1979 at 2023-06-16 04:47:15.954734
Finished 1980_1989 at 2023-06-16 04:47:19.987169
Finished 1990_1999 at 2023-06-16 04:47:25.271506
Finished 2000_2009 at 2023-06-16 04:47:31.822263
Finished 2010_2020 at 2023-06-16 04:47:37.896337


In [93]:
concept_embeddings['autocracy'] = te_autocracy

#### "autocratic"

In [94]:
te_autocratic = get_embeddings_from_sents(year_list, 'autocratic', concept_sentence_dict, layers, random_sample = False)

Starting autocratic at 2023-06-16 04:47:37.916589
Finished 1900_1909 at 2023-06-16 04:47:40.703190
Finished 1910_1919 at 2023-06-16 04:47:43.829769
Finished 1920_1929 at 2023-06-16 04:47:46.941488
Finished 1930_1939 at 2023-06-16 04:47:50.060274
Finished 1940_1949 at 2023-06-16 04:47:53.333952
Finished 1950_1959 at 2023-06-16 04:47:56.756155
Finished 1960_1969 at 2023-06-16 04:48:00.534103
Finished 1970_1979 at 2023-06-16 04:48:04.515920
Finished 1980_1989 at 2023-06-16 04:48:08.503779
Finished 1990_1999 at 2023-06-16 04:48:15.112008
Finished 2000_2009 at 2023-06-16 04:48:21.944000
Finished 2010_2020 at 2023-06-16 04:48:30.215912


In [95]:
concept_embeddings['autocratic'] = te_autocratic

#### "democracy"

In [96]:
te_democracy = get_embeddings_from_sents(year_list, 'democracy', concept_sentence_dict, layers, random_sample = False)

Starting democracy at 2023-06-16 04:48:30.240336
Finished 1900_1909 at 2023-06-16 04:48:36.482727
Finished 1910_1919 at 2023-06-16 04:48:50.822462
Finished 1920_1929 at 2023-06-16 04:49:03.507865
Finished 1930_1939 at 2023-06-16 04:49:24.411149
Finished 1940_1949 at 2023-06-16 04:50:09.755148
Finished 1950_1959 at 2023-06-16 04:50:55.333172
Finished 1960_1969 at 2023-06-16 04:51:43.487250
Finished 1970_1979 at 2023-06-16 04:52:40.103805
Finished 1980_1989 at 2023-06-16 04:54:29.656739
Finished 1990_1999 at 2023-06-16 04:59:22.490105
Finished 2000_2009 at 2023-06-16 05:06:49.165024
Finished 2010_2020 at 2023-06-16 05:12:04.832059


In [97]:
concept_embeddings['democracy'] = te_democracy

#### "dictator"

In [98]:
te_dictator = get_embeddings_from_sents(year_list, 'dictator', concept_sentence_dict, layers, random_sample = False)

Starting dictator at 2023-06-16 05:12:04.868383
Finished 1900_1909 at 2023-06-16 05:12:08.191754
Finished 1910_1919 at 2023-06-16 05:12:11.743348
Finished 1920_1929 at 2023-06-16 05:12:15.196074
Finished 1930_1939 at 2023-06-16 05:12:19.773666
Finished 1940_1949 at 2023-06-16 05:12:24.253918
Finished 1950_1959 at 2023-06-16 05:12:29.041182
Finished 1960_1969 at 2023-06-16 05:12:33.439560
Finished 1970_1979 at 2023-06-16 05:12:38.351999
Finished 1980_1989 at 2023-06-16 05:12:44.779669
Finished 1990_1999 at 2023-06-16 05:12:52.008237
Finished 2000_2009 at 2023-06-16 05:13:01.639092
Finished 2010_2020 at 2023-06-16 05:13:08.946092


In [99]:
concept_embeddings['dictator'] = te_dictator

#### "dictatorship"

In [100]:
te_dictatorship = get_embeddings_from_sents(year_list, 'dictatorship', concept_sentence_dict, layers, random_sample = False)

Starting dictatorship at 2023-06-16 05:13:08.978021
Finished 1900_1909 at 2023-06-16 05:13:12.072556
Finished 1910_1919 at 2023-06-16 05:13:12.072778
Finished 1920_1929 at 2023-06-16 05:13:16.101846
Finished 1930_1939 at 2023-06-16 05:13:22.614184
Finished 1940_1949 at 2023-06-16 05:13:29.188358
Finished 1950_1959 at 2023-06-16 05:13:36.993360
Finished 1960_1969 at 2023-06-16 05:13:45.506059
Finished 1970_1979 at 2023-06-16 05:13:54.684327
Finished 1980_1989 at 2023-06-16 05:14:08.755251
Finished 1990_1999 at 2023-06-16 05:14:29.539421
Finished 2000_2009 at 2023-06-16 05:14:51.459365
Finished 2010_2020 at 2023-06-16 05:15:10.832965


In [101]:
concept_embeddings['dictatorship'] = te_dictatorship

In [102]:
# SAVE CONCEPT EMBEDDINGS

with open('./crs_embeds/concept_embeds.pkl', 'wb') as handle:
    pickle.dump(concept_embeddings, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# LOAD CONCEPT EMBEDDINGS

#with open('./crs_embeds/concept_embeds.pkl', 'rb') as handle:
#    concept_embeddings = pickle.load(handle)