# **I. Imports**

In [2]:
import pandas as pd
import random
import string
import re
import time 
import tensorflow as tf
import tensorflow.data as tf_data   #not used now
import tensorflow.strings as tf_strings   #not used now

import tensorflow.keras
from tensorflow.keras import layers
from tensorflow.keras.layers import TextVectorization

import numpy as np
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import RMSprop
from gensim.models import Word2Vec

# import keras.ops as ops
import joblib

#### **Checking GPU availabiliy**

In [3]:
# from keras import backend as K
# K.tensorflow_backend._get_available_gpus()
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 10520094975519231041
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14626652160
locality {
  bus_id: 1
  links {
    link {
      device_id: 1
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 12785755483320340023
physical_device_desc: "device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5"
xla_global_id: 416903419
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 14626652160
locality {
  bus_id: 1
  links {
    link {
      type: "StreamExecutor"
      strength: 1
    }
  }
}
incarnation: 16816183578209216759
physical_device_desc: "device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5"
xla_global_id: 2144165316
]


# **II. Data Extraction & Visualization**

In [4]:
file = '/kaggle/input/tabdelimited-englisharabic-sentence-pairs/fra.txt'

In [5]:
# df_raw = pd.read_csv("fra.txt", delimiter='\t', error_bad_lines=False, header=None, names=['en', 'fr'], index_col=False)
df_raw = pd.read_csv(file, delimiter='\t', encoding='utf-8', header=None, names=['en', 'fr'], index_col=False)
df_raw.head()

Unnamed: 0,en,fr
0,Go.,Va !
1,Hi.,Salut !
2,Run!,Cours !
3,Run!,Courez !
4,Wow!,Ça alors !


In [6]:
"""Choosing a subset of the dataframe to work with (eventually choosing the whole dataframe)"""
lang1 = 'en'
lang2 = 'fr'  #subject to changes
# Create a new DataFrame with every fourth row (50% of th data)
# df_raw1 = df_raw.iloc[::2, :].reset_index(drop=True)
df_raw1 = df_raw
print(f"Number of sentences : {df_raw1.count()[0]}")
print(f"English longest sentence: {df_raw1[lang1].str.len().max()}")
print(f"French longest sentence: {df_raw1[lang2].str.len().max()}") 
df_raw1.head()

Number of sentences : 160538
English longest sentence: 286
French longest sentence: 349


  print(f"Number of sentences : {df_raw1.count()[0]}")


Unnamed: 0,en,fr
0,Go.,Va !
1,Hi.,Salut !
2,Run!,Cours !
3,Run!,Courez !
4,Wow!,Ça alors !


In [7]:
print(df_raw1["en"][len(df_raw1)-1],"\n")
print(df_raw1["fr"][len(df_raw1)-1])

It may be impossible to get a completely error-free corpus due to the nature of this kind of collaborative effort. However, if we encourage members to contribute sentences in their own languages rather than experiment in languages they are learning, we might be able to minimize errors. 

Il est peut-être impossible d'obtenir un Corpus complètement dénué de fautes, étant donnée la nature de ce type d'entreprise collaborative. Cependant, si nous encourageons les membres à produire des phrases dans leurs propres langues plutôt que d'expérimenter dans les langues qu'ils apprennent, nous pourrions être en mesure de réduire les erreurs.


# **III. Data Preprocessing**

In [8]:
#Returning a list of tuples (corresponding eng-fre sentences)
def Create_pairs(dataframe):
    text_pairs = [(row['en'], "[start] "+row['fr']+" [end]") for index, row in dataframe.iterrows()] 
    return text_pairs

In [9]:
##########useless for now#############
# import spacy
# # nlp = spacy.load('en',disable=['parser', 'tagger','ner'])  #deprecated on spacy v3
# # nlp = spacy.load('en_core_web_sm')
# nlp = spacy.load("en_core_web_sm", exclude=["parser", "tagger", "ner"])
# nlp.max_length = 1198623

def separate_punc(doc_text):
    return [token.text.lower() for token in nlp(doc_text) if token.text not in '\n\n \n\n\n!"-#$%&()--.*+,-/:;<=>?@[\\]^_`{|}~\t\n ']

In [10]:
#Create the pairs
text_pairs = Create_pairs(df_raw1)

In [11]:
#How sentence pairs look like
for i in range(3):
    pair = random.choice(text_pairs)
    print(pair)
    print()

("You're upset.", '[start] Tu es contrariée. [end]')

('Everybody is supposed to know the law, but few people really do.', '[start] Tout le monde est censé connaître les lois, mais très peu de gens les connaissent vraiment. [end]')

('The GDP of China still pales in comparison with that of the US.', '[start] Le PIB de la Chine est encore dérisoire en comparaison de celui des États-Unis. [end]')



In [12]:
# en_sentences = [en_sentence for en_sentence, _ in text_pairs[:20000]]
# all_en_sentences = ' '.join(en_sentences)
# eng_doc = nlp(all_en_sentences)
# unique_eng_tokens = set(token.text for token in eng_doc)
# len(unique_eng_tokens)

## ---> All english sentences (~160K) contain together ~14.5K unique tokens
## ---> All french sentences (~160K) contain together >25K unique tokens

In [13]:
"""
Now, let's split the sentence pairs into a training set, a validation set,
and a test set.
"""
random.seed(123)
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples : num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples :]

print(f"{len(text_pairs)} total text pairs")
print(f"{len(train_pairs)} training pairs")
print(f"{len(val_pairs)} validation pairs")
print(f"{len(test_pairs)} test pairs")
print("THIS TRAIN TEST VAL SPLIT IS NOT USED YET (cf val=0.2 : automatic split in the keras model def)")

160538 total text pairs
112378 training pairs
24080 validation pairs
24080 test pairs
THIS TRAIN TEST VAL SPLIT IS NOT USED YET (cf val=0.2 : automatic split in the keras model def)


### **TextVecotrization: tokenization & index representation**

In [14]:
strip_chars = list(string.punctuation)
eng_strip_chars = '  '.join(strip_chars)
strip_chars.remove('[')
strip_chars.remove(']')
fre_strip_chars = '  '.join(strip_chars)

In [15]:
print(f"Characters to be deleted from english sequences:  {eng_strip_chars}")
print(f"Characters to be deleted from french sequences:  {fre_strip_chars}")

#There's a default standardization done by keras.layers.TextVectorization (we'll apply it to english sentences)
#But it deletes also '[' and ']' symbols, that's why we specify a custom standardization for french sentences (we want to keep [start] and [end] tokens)
def custom_standardization(input_string):
    lowercase = tf_strings.lower(input_string)
    return tf_strings.regex_replace(lowercase, "[%s]" % re.escape(''.join(strip_chars)), "")

#testing the function
print(custom_standardization("[start] Vas y, cours 'plus' vite !! [end]"))

Characters to be deleted from english sequences:  !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /  :  ;  <  =  >  ?  @  [  \  ]  ^  _  `  {  |  }  ~
Characters to be deleted from french sequences:  !  "  #  $  %  &  '  (  )  *  +  ,  -  .  /  :  ;  <  =  >  ?  @  \  ^  _  `  {  |  }  ~
tf.Tensor(b'[start] vas y cours plus vite  [end]', shape=(), dtype=string)


#### Vectorization Parameters

In [16]:
vocab_size = 20000
sequence_length = 20 #20 suggested in litterature

In [17]:
"""Vectorizing text data using Keras TextVectorization -> Representing unique tokens with indices in a dictionnary"""

#Default standardization for eng (strip string.punctuations )
eng_vectorization = TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length,
)
#customized for frenish
fre_vectorization = TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length + 1,
    standardize=custom_standardization,
)

### Training the vectorizers

In [18]:
train_eng_texts = [pair[0] for pair in text_pairs]  #Change text_pairs to train_pairs if I want to use the train-test-val split
train_fre_texts = [pair[1] for pair in text_pairs]

eng_vectorization.adapt(train_eng_texts) #fitting the text vectorization layer to data
fre_vectorization.adapt(train_fre_texts) #same (but unchanged data)

### Downloading the vectorizers

In [96]:
"""This cell downloads the two TextVectorizer that were trained on our text data"""
##(déjà fait pour vocab_size=15000 et sentence_length=14)
## Sauvegarder le vectorizer anglais ##

# eng_vectorization_data = {
#     'config': eng_vectorization.get_config(),
#     'weights': eng_vectorization.get_weights()
# }
# joblib.dump(eng_vectorization_data, 'text_vectorizer_eng_20k-vocab20.joblib')

# # Sauvegarder le vectorizer français ##
# fre_vectorization_data = {
#     'config': fre_vectorization.get_config(),
#     'weights': fre_vectorization.get_weights()
# }
# joblib.dump(fre_vectorization_data, 'text_vectorizer_fr_20k-vocab20.joblib')


['text_vectorizer_fr_20k-vocab20.joblib']

In [20]:
# Get the vocabulary of the text vectorization layer
vocabulary = fre_vectorization.get_vocabulary()
print("french vocab: ", len(vocabulary))
print("english vocab: ", len(eng_vectorization.get_vocabulary()))

# vocab_size = min(len(eng_vectorization.get_vocabulary()), len(vocabulary))

# Check if "[start]" is in the vocabulary
is_start_token_in_vocab = "[start]" in vocabulary

# Print the result
print(f"[start] is considered as a single token: {is_start_token_in_vocab}")

french vocab:  20000
english vocab:  14341
[start] is considered as a single token: True


In [21]:
# """Next, we'll format our datasets.

# At each training step, the model will seek to predict target words N+1 (and beyond)
# using the source sentence and the target words 0 to N.

# As such, the training dataset will yield a tuple `(inputs, targets)`, where:

# - `inputs` is a dictionary with the keys `encoder_inputs` and `decoder_inputs`.
# `encoder_inputs` is the vectorized source sentence and `encoder_inputs` is the target sentence "so far",
# that is to say, the words 0 to N used to predict word N+1 (and beyond) in the target sentence.
# - `target` is the target sentence offset by one step:
# it provides the next words in the target sentence -- what the model will try to predict.
# """
# def format_dataset(eng, fre):
#     eng = eng_vectorization(eng)
#     fre = fre_vectorization(fre)
#     return (
#         {
#             "encoder_inputs": eng,
#             "decoder_inputs": fre[:, :-1],
#         },
#         fre[:, 1:],
#     )

# **IV. Model Definition & Training**

## **IV.1 Applying Vectorizers, Padding sequences, Encoder-Decoder Definition**
### Encoder input
Encoder Input: Sequences of English tokens (sentences) represented as integer indices.

In [22]:
# Data preparation ( stripping, tokenization and vectorization(indexing) )
train_eng_sequences = eng_vectorization(train_eng_texts)
# Pad the sequences to the specified sequence length
start = time.time()
encoder_input_data = pad_sequences(train_eng_sequences, maxlen=sequence_length, padding="post")
end = time.time()
print(f"Incoder pad_sequences execution time : {end - start:.2f}")

Incoder pad_sequences execution time : 180.66


In [23]:
len(encoder_input_data[0])

20

### Decoder input
Decoder Input: Sequences of french sentences represented as integer indices.
SHIFTED BY 1 POSITION !

In [24]:
# Data preparation
train_fre_sequences = fre_vectorization(train_fre_texts)
# Shift the target french sentences by one position (decoder input)
decoder_input_data = train_fre_sequences[:, :-1]
# Pad the sequences to the specified sequence length (+1 for the start token)
# decoder_input_data = pad_sequences(decoder_input_data, maxlen=sequence_length + 1, padding="post")
start = time.time()
decoder_input_data = pad_sequences(decoder_input_data, maxlen=sequence_length, padding="post")
end = time.time()
print(f"decoder pad_sequences execution time : {end - start:.2f}")

decoder pad_sequences execution time : 179.87


In [25]:
len(decoder_input_data[0])

20

### Decoder Output

Decoder Output: Target French sentences (with hiding the first token). \
N.B : I won't one-hot encode these sentences and use Categorical Crossentropy loss. To make it less expensive, I'll let tokens represented with indicies and I'll use Sparse Categorical Crossentropy

In [26]:
"""One-hot encoded representations can be an alternative (very expensive)"""

# # One-hot encode the target french sentences (decoder output)
# decoder_output_data = to_categorical(train_fre_sequences[:, 1:], num_classes=vocab_size)

# decoder_output_data = decoder_output_data[:, :sequence_length + 1, :]  # Adjust the sequence length
decoder_output_data = train_fre_sequences[:, 1:]
len(decoder_output_data[0])

20

## **IV.2 Model Definition**

#### Model parameters

In [97]:
embed_dim = 256 #should be equal to embed_dim (useless for now)
latent_dim = 176
batch_size = 128  # Batch size for training. 
epochs = 46  # Number of epochs to train for.
num_encoder_tokens = vocab_size  #max vocab of the original dataset (df_raw) for english is around 14.5k
num_decoder_tokens = vocab_size

In [98]:
##Variant (word2Vec for embedding then no training of the embedding layer). (Cf. NLP from 0 to 1 : text classification and M.translation: Medium)
# embedding_layer = Embedding(vocab_size, 150, weights=[embedding_vectors], input_length=max_length, trainable=False)
# model = Sequential()
# model.add(embedding_layer)
# model.add(Dropout(0.2))

In [99]:
# Need to have inputs = integer sequences (representing sequences of words, encoded by their index in a dictionary)
# # Compile & run training
# model.compile(optimizer='rmsprop', loss='categorical_crossentropy')  #if decoder_output_data was one-hot encoded
# instantiating the model in the strategy scope creates the model on the TPU
# with strategy.scope():
    
# Define an input sequence and process it.
encoder_inputs = Input(shape=(None,))
x = Embedding(num_encoder_tokens, latent_dim)(encoder_inputs)
x, state_h, state_c = LSTM(latent_dim,
                           return_state=True)(x)
x = Dropout(0.3)(x)  # Add dropout layer to the encoder

encoder_states = [state_h, state_c]

# Set up the decoder, using `encoder_states` as initial state.
decoder_inputs = Input(shape=(None,))
x = Embedding(num_decoder_tokens, latent_dim)(decoder_inputs)
x = LSTM(latent_dim, return_sequences=True)(x, initial_state=encoder_states)
x = Dropout(0.3)(x)  # Add dropout layer to the decoder
decoder_outputs = Dense(num_decoder_tokens, activation='softmax')(x)

# Define the model that will turn
# `encoder_input_data` & `decoder_input_data` into `decoder_target_data`
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
# Compile the model with sparse categorical crossentropy
optimizer = RMSprop(learning_rate=0.002)
model.compile(optimizer=optimizer, loss=sparse_categorical_crossentropy)

    

In [100]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 embedding_2 (Embedding)     (None, None, 176)            3520000   ['input_3[0][0]']             
                                                                                                  
 embedding_3 (Embedding)     (None, None, 176)            3520000   ['input_4[0][0]']             
                                                                                            

## **IV.3 Launching Training**

In [101]:
model.fit([encoder_input_data, decoder_input_data], decoder_output_data,
          batch_size=batch_size,
          epochs=epochs,
          validation_split=0.18,
          verbose=1)
# strategy.run(replica_fn, args=dist_batch)

Epoch 1/46
Epoch 2/46
Epoch 3/46
Epoch 4/46
Epoch 5/46
Epoch 6/46
Epoch 7/46
Epoch 8/46
Epoch 9/46
Epoch 10/46
Epoch 11/46
Epoch 12/46
Epoch 13/46
Epoch 14/46
Epoch 15/46
Epoch 16/46
Epoch 17/46
Epoch 18/46
Epoch 19/46
Epoch 20/46
Epoch 21/46
Epoch 22/46
Epoch 23/46
Epoch 24/46
Epoch 25/46
Epoch 26/46
Epoch 27/46
Epoch 28/46
Epoch 29/46
Epoch 30/46
Epoch 31/46
Epoch 32/46
Epoch 33/46
Epoch 34/46
Epoch 35/46
Epoch 36/46
Epoch 37/46
Epoch 38/46
Epoch 39/46
Epoch 40/46
Epoch 41/46
Epoch 42/46
Epoch 43/46
Epoch 44/46
Epoch 45/46
Epoch 46/46


<keras.src.callbacks.History at 0x7fc07397f850>

# **V. Inference:**
##### Encoder Input: A single English sentence represented as integer indices.
##### Decoder Input: A start-of-sequence token (initially) and subsequently predicted tokens (Recurrency).
##### Decoder Output: Predicted probabilities for the next word in the sequence.

In [58]:
def translate_sentence(model, eng_text, eng_vectorization, fre_vectorization, sequence_length):
    
    # Tokenize and pad the English input sentence
    eng_sequence = eng_vectorization(np.array([eng_text]))
    eng_sequence = pad_sequences(eng_sequence, maxlen=sequence_length, padding="post")

    # Initialize the decoder input with the start token
    fre_sequence = np.zeros((1, sequence_length), dtype=np.int32)
    fre_sequence[0, 0] = fre_vectorization.get_vocabulary().index('[start]')

    # Inference loop
    for i in range(1, sequence_length):
        predictions = model.predict([eng_sequence, fre_sequence])
        predicted_token_index = np.argmax(predictions[0, i - 1, :])
        fre_sequence[0, i] = predicted_token_index

        # Check for the end token
        if fre_vectorization.get_vocabulary()[predicted_token_index] == '[end]':
            break

    # Convert the predicted indices to French text
    translated_text = ' '.join([fre_vectorization.get_vocabulary()[idx] for idx in fre_sequence[0] if idx > 0])
    if not("[end]" in translated_text):
        translated_text+=" [end]"
    
    #further processing (without the trained model) of the sentence....
    # to think about (ex. change cest with c'est in the french sentence)  (renverse preprocessing)
    

    return translated_text



#### **Inference tests of the trained Model**

In [94]:
#test before saving
# Example usage
eng_text_to_translate = "It's very beautiful" 
translated_sentence = translate_sentence(model, eng_text_to_translate, eng_vectorization, fre_vectorization, sequence_length)
print("Translated Sentence (current model): ", translated_sentence)

Translated Sentence (current model):  [start] cest très beau [end]


### Saving the trained model :
##### Be careful to save only after complete training and getting good performances

In [95]:
# Save the model
model.save('my_translation_model_gpu_v80.h5')

### Loading a model for inference

In [32]:
from tensorflow.keras.models import load_model
import os
print(os.listdir('/kaggle/input'))

['tabdelimited-englisharabic-sentence-pairs', 'translation-of-sentences-in-different-languages', 'my-model-v6-0-97-val-loss-and-0-3-train-loss']


In [59]:
# Load the model for inference
# model_v01 = load_model('my_translation_model_v01.h5')

# model_gpu_v02 = load_model('my_translation_model_gpu_v02.h5')   #big (20k vocab and 50 epochs, 32min of training with cpu+1gpu)
# model_gpu_v3 = load_model('my_translation_model_gpu_v3')   #big (14k vocab and 35 epochs, 18min of training with cpu+1gpu, lr=0.005, 0.2 train_loss/ 1.01 val_loss/ 512 latent dim) : not bad nor good

# model_gpu_v6 = load_model('my_translation_model_gpu_v6.h5')   #big (14k vocab and 30 epochs, .... of training with cpu+1gpu, lr=0.001, 0.41 train_loss/ 0.97 val_loss/ 256 latent dim) Impression : not bad

#model_gpu_v7     #Big : 20k vocab, 27 epochs, gpu, lr=0.02, 0.6 train_loss, 0.93 val_loss, 256 embedding/hidden, dropout(0.2)

model_gpu_v6 = load_model('/kaggle/input/my-model-v6-0-97-val-loss-and-0-3-train-loss/my_translation_model_gpu_v6.h5') 


# model_v03 = load_model('my_translation_model_v03')  #medium (10k vocab and 40 epochs, less than 10min of training with CPUs)

In [None]:
# # Example usage
# eng_text_to_translate = "don't talk to me"
# translated_sentence = translate_sentence(model_gpu_v02, eng_text_to_translate, eng_vectorization, fre_vectorization, sequence_length)
# print("Translated Sentence (gpu_v02): ", translated_sentence)

In [None]:
# # Example usage
# eng_text_to_translate = "say good words"
# translated_sentence = translate_sentence(model_gpu_v3, eng_text_to_translate, eng_vectorization, fre_vectorization, sequence_length)
# print("Translated Sentence (gpu_v3): ", translated_sentence)
# # for _ in range(30):
# #     input_sentence = random.choice(test_eng_texts)
# #     print( "Translated Sentence (gpu_v3): ", decode_sequence(input_sentence)() )

In [None]:
# Example usage
eng_text_to_translate = "I dream about you every night"
translated_sentence = translate_sentence(model_gpu_v6, eng_text_to_translate, eng_vectorization, fre_vectorization, sequence_length)
print("Translated Sentence (gpu_v6): ", translated_sentence)

In [3]:
"""Checking if a specific word is in vocabulary"""
vocab_fra = fre_vectorization.get_vocabulary()
vocab_eng = eng_vectorization.get_vocabulary()
print(len(vocab_eng))
'babe' in vocab_eng

NameError: name 'fre_vectorization' is not defined

In [None]:
# """ Proposition for adapting this model by integrating transformers"""
# import tensorflow as tf
# from tensorflow import keras
# from tensorflow.keras import layers

# # ... (previous code remains unchanged)

# # Encoder
# encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="encoder_inputs")
# encoder_embedding = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)(encoder_inputs)
# encoder_lstm = layers.LSTM(latent_dim, return_state=True)
# encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)
# encoder_states = [state_h, state_c]

# encoder = keras.Model(encoder_inputs, encoder_states)

# # Decoder
# decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="decoder_inputs")
# decoder_embedding = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)(decoder_inputs)
# decoder_lstm = layers.LSTM(latent_dim, return_sequences=True, return_state=True)
# decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states)
# decoder_dense = layers.Dense(vocab_size, activation="softmax")
# decoder_outputs = decoder_dense(decoder_outputs)

# decoder = keras.Model(decoder_inputs, decoder_outputs)

# # Model
# decoder_outputs = decoder(decoder_inputs)
# lstm_transformer = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs, name="lstm_transformer")

# # ... (rest of the code remains unchanged)

# # Training the model
# lstm_transformer.summary()
# lstm_transformer.compile(
#     "rmsprop", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
# )
# lstm_transformer.fit(train_ds, epochs=epochs, validation_data=val_ds)

# # ... (rest of the code remains unchanged)

# # Decoding test sentences
# fre_vocab = fre_vectorization.get_vocabulary()
# fre_index_lookup = dict(zip(range(len(fre_vocab)), fre_vocab))
# max_decoded_sentence_length = 20

# # ... (rest of the code remains unchanged)
