In [1]:
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.layers import  Input
from tensorflow.keras.models import Model, load_model 


class Text2Vec:
    def __init__(self, model_path, df_vocab):
        latent_dim = 60 
        self.df_vocab = df_vocab 
        model = load_model(model_path + os.sep +'model.hdf5')
        print(model.summary())

        input_data   = Input(shape=(latent_dim,), name='Input-Layer')
        output_data  = model.get_layer('Embedding-Layer')  (input_data) 
        
        self.infr_model = Model (input_data, output_data,name='Inference-Model')
        print(self.infr_model.summary())

        ids = [i for i in range (0, len (df_vocab))]
        keys = df_vocab['word'].to_list()
        self.word2id = dict (zip (keys, ids))
        
        input_data = np.random.randint (11,2344,[100,1])
        out = self.infr_model.predict (input_data)
        
        print(out.shape)
        print(out)

    
    def text2vec (self, text):

        tokens = text.split(" ")
        vec = [self.word2id[token] for token in tokens if token in tokens]
        vec = self.infr_model.predict (vec)
        
        return vec 


if __name__ == "__main__":

    model_root="tmp_lstm"
    model_path = model_root + os.sep + "trained_model"

    df_vocab = pd.read_csv(model_root+os.sep + "vocab.csv")
    df_vocab = df_vocab [ df_vocab['count'] > 10].dropna()

    T = Text2Vec (model_path, df_vocab)

    words = df_vocab['word'].to_list()
    words = [ str(w) for w in words]  

    text = " ".join (words)
    vecs = T.text2vec (text)

    dF = pd.DataFrame (columns=[i for i  in range (0, 60)])
    for i in range (0, len(words)):
        dF.loc[i] = vecs[i,:] 

    dF.index = words 
    
    dF.to_csv(model_root + os.sep + "word_vecs.csv")                 
    

Model: "Encoder-Model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 Input-Layer (InputLayer)    [(None, 10)]              0         
                                                                 
 Embedding-Layer (Embedding  (None, 10, 60)            1551120   
 )                                                               
                                                                 
 Encoder-Batchnorm-1 (Batch  (None, 10, 60)            240       
 Normalization)                                                  
                                                                 
 lstm_1 (LSTM)               [(None, 60),              29040     
                              (None, 60),                        
                              (None, 60)]                        
                                                                 
 Final-Output-Dense (Dense)  (None, 2)               

In [3]:
25852 * 60

1551120