In [None]:
## Simple code to extract and save the word embeddings for the ElMo model
## James Fodor 2022
## Python 3.7
## See https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/ELMO_EMBEDDINGS.md

import numpy as np
from flair.data import Sentence
from flair.embeddings import DocumentPoolEmbeddings, ELMoEmbeddings

# Set numpy display properties needed for printing to file
np.set_printoptions(precision=4, threshold=1000, linewidth=1000, suppress=True, floatmode='fixed')

# Location of data files
path_base = 'D:/Study and Projects/School Work/Year 25 - PhD 1/Data//' # root location of data

In [None]:
# Define embedding object using downloaded elmo embeddings
model_loc = 'Word Embeddings//'
options_path = path_base + model_loc + 'Elmo Embeddings/elmo_false_wiki2019/options.json'
weights_path = path_base + model_loc + 'Elmo Embeddings/elmo_false_wiki2019/model.hdf5'
elmo_embeds = DocumentPoolEmbeddings([ELMoEmbeddings(options_file=options_path, weight_file=weights_path)])

In [9]:
# Generate elmo embeddings for elmo vocab set
save_file = open('elmo_mini.txt', "a", encoding='utf-8')
vocab_path = '../combined_corpus_66k.txt'

with open(vocab_path, 'rt', encoding='utf-8') as vocab_file:
    for line in vocab_file:
        word = line.strip()
        word_token = Sentence(word)
        elmo_embeds.embed(word_token)
        word_embed = np.array(word_token.embedding)
        embed_string = np.array_str(word_embed) # convert np_array to string
        final_string = (word+' '+embed_string[2:-1]) # add word to front of embed string
        save_file.writelines(final_string)
        save_file.write('\n')
        
save_file.close()