In [2]:
import pandas as pd

In [3]:
# Load the dataset
data = pd.read_csv('archive/text.csv')

In [4]:
# Data Exploration
print("Dataset Shape:", data.shape)
print("\nSample Data:")
print(data.head())

Dataset Shape: (416809, 3)

Sample Data:
   Unnamed: 0                                               text  label
0           0      i just feel really helpless and heavy hearted      4
1           1  ive enjoyed being able to slouch about relax a...      0
2           2  i gave up my internship with the dmrg and am f...      4
3           3                         i dont know i feel so lost      0
4           4  i am a kindergarten teacher and i am thoroughl...      4


In [5]:
# Check for missing values
print("\nMissing Values:")
print(data.isnull().sum())


Missing Values:
Unnamed: 0    0
text          0
label         0
dtype: int64


In [6]:
# Data Preprocessing
# Assuming 'tweet' column contains the tweet text and 'emotion' column contains the emotion label
tweets = data['text']
emotions = data['label']

In [7]:

# Perform tokenization, lowercasing, and removing punctuation
import re
def preprocess_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove punctuation
    text = text.lower()  # Convert text to lowercase
    return text

tweets = tweets.apply(preprocess_text)


In [8]:
# Handle class imbalance if necessary (e.g., using SMOTE)
from imblearn.over_sampling import SMOTE

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(tweets, emotions, test_size=0.2, random_state=42)

# Encode the target labels
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

In [9]:
# Print the class distribution
print("\nClass Distribution in Training Set:")
print(pd.Series(y_train_encoded).value_counts())



Class Distribution in Training Set:
1    112903
0     96986
3     45876
4     38118
2     27625
5     11939
Name: count, dtype: int64


In [10]:
# Define Evaluation Metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def evaluate_model(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='weighted')
    recall = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')
    print("Accuracy:", accuracy)
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)


# RNN Representation

In [12]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp311-cp311-win_amd64.whl.metadata (3.5 kB)
Collecting tensorflow-intel==2.16.1 (from tensorflow)
  Downloading tensorflow_intel-2.16.1-cp311-cp311-win_amd64.whl.metadata (5.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading flatbuffers-24.3.7-py2.py3-none-any.whl.metadata (849 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading gast-0.5.4-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow-intel==2.16.1->tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=3.10.0 (from tensorflow-i

In [13]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

In [14]:
# Tokenize the text data
max_words = 10000  # Maximum number of words to keep in the vocabulary
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

In [15]:
# Convert text data to sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

In [16]:
# Pad sequences to ensure uniform length
max_sequence_length = 100  # Define your desired sequence length
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)


In [18]:
# Define the RNN model
embedding_dim = 100  # Dimension of word embeddings
hidden_units = 64  # Number of units in the RNN layer

model_rnn = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim),
    SimpleRNN(units=hidden_units, activation='tanh'),
    Dense(units=6, activation='softmax')  # Assuming 6 emotions to classify
])

In [19]:
# Compile the model
model_rnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [20]:
# Print model summary
print(model_rnn.summary())

None


In [21]:
# Train the model
epochs = 10  # Define the number of epochs
batch_size = 64  # Define the batch size
model_rnn.fit(X_train_padded, y_train_encoded, epochs=epochs, batch_size=batch_size, validation_data=(X_test_padded, y_test_encoded))

Epoch 1/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m119s[0m 22ms/step - accuracy: 0.7421 - loss: 0.7329 - val_accuracy: 0.9005 - val_loss: 0.3061
Epoch 2/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m135s[0m 21ms/step - accuracy: 0.9025 - loss: 0.2835 - val_accuracy: 0.8956 - val_loss: 0.2777
Epoch 3/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m108s[0m 21ms/step - accuracy: 0.9040 - loss: 0.2725 - val_accuracy: 0.8467 - val_loss: 0.4330
Epoch 4/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 21ms/step - accuracy: 0.8717 - loss: 0.3865 - val_accuracy: 0.9097 - val_loss: 0.2381
Epoch 5/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 21ms/step - accuracy: 0.9254 - loss: 0.1743 - val_accuracy: 0.9219 - val_loss: 0.1745
Epoch 6/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m142s[0m 21ms/step - accuracy: 0.9318 - loss: 0.1434 - val_accuracy: 0.9239 - val_loss: 0.135

<keras.src.callbacks.history.History at 0x21342aa0b90>

In [22]:
# Evaluate the model
loss, accuracy = model_rnn.evaluate(X_test_padded, y_test_encoded)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - accuracy: 0.9138 - loss: 0.2162
Test Loss: 0.21466274559497833
Test Accuracy: 0.9143974184989929


In [23]:
# Obtain representations
get_representation_rnn = Sequential(model_rnn.layers[:-1])  # Remove the output layer
representations_train_rnn = get_representation_rnn.predict(X_train_padded)
representations_test_rnn = get_representation_rnn.predict(X_test_padded)

[1m10421/10421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 5ms/step
[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step


# LSTM

In [24]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

In [25]:
# Tokenize the text data
max_words = 10000  # Maximum number of words to keep in the vocabulary
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

In [26]:
# Convert text data to sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

In [27]:
# Pad sequences to ensure uniform length
max_sequence_length = 100  # Define your desired sequence length
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

In [29]:
# Define the LSTM model
embedding_dim = 100  # Dimension of word embeddings
hidden_units = 64  # Number of units in the LSTM layer

model_lstm = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim),
    LSTM(units=hidden_units),
    Dense(units=6, activation='softmax')  # Assuming 6 emotions to classify
])

In [30]:
# Compile the model
model_lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [31]:
# Print model summary
print(model_lstm.summary())

None


In [32]:
# Train the model
epochs = 10  # Define the number of epochs
batch_size = 64  # Define the batch size
model_lstm.fit(X_train_padded, y_train_encoded, epochs=epochs, batch_size=batch_size, validation_data=(X_test_padded, y_test_encoded))

Epoch 1/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 49ms/step - accuracy: 0.8181 - loss: 0.4568 - val_accuracy: 0.9363 - val_loss: 0.1000
Epoch 2/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m263s[0m 49ms/step - accuracy: 0.9389 - loss: 0.0976 - val_accuracy: 0.9363 - val_loss: 0.1054
Epoch 3/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m266s[0m 50ms/step - accuracy: 0.9399 - loss: 0.0918 - val_accuracy: 0.9387 - val_loss: 0.0952
Epoch 4/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m261s[0m 50ms/step - accuracy: 0.9421 - loss: 0.0867 - val_accuracy: 0.9367 - val_loss: 0.0967
Epoch 5/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m260s[0m 50ms/step - accuracy: 0.9433 - loss: 0.0850 - val_accuracy: 0.9346 - val_loss: 0.0959
Epoch 6/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m265s[0m 51ms/step - accuracy: 0.9448 - loss: 0.0818 - val_accuracy: 0.9331 - val_loss: 0.098

<keras.src.callbacks.history.History at 0x21342ab4190>

In [33]:
# Evaluate the model
loss, accuracy = model_lstm.evaluate(X_test_padded, y_test_encoded)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 13ms/step - accuracy: 0.9287 - loss: 0.1124
Test Loss: 0.11196041107177734
Test Accuracy: 0.929008424282074


In [34]:
# Obtain representations
get_representation_lstm = Sequential(model_lstm.layers[:-1])  # Remove the output layer
representations_train_lstm = get_representation_lstm.predict(X_train_padded)
representations_test_lstm = get_representation_lstm.predict(X_test_padded)

[1m10421/10421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 12ms/step
[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 12ms/step


# GRU

In [15]:
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense

In [16]:
# Tokenize the text data
max_words = 10000  # Maximum number of words to keep in the vocabulary
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)

In [17]:
# Convert text data to sequences
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

In [18]:
# Pad sequences to ensure uniform length
max_sequence_length = 100  # Define your desired sequence length
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

In [19]:
# Define the GRU model
embedding_dim = 100  # Dimension of word embeddings
hidden_units = 64  # Number of units in the GRU layer

model_gru = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim),
    GRU(units=hidden_units),
    Dense(units=6, activation='softmax')  # Assuming 6 emotions to classify
])

In [20]:
# Compile the model
model_gru.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [21]:
# Print model summary
print(model_gru.summary())

None


In [22]:
# Train the model
epochs = 10  # Define the number of epochs
batch_size = 64  # Define the batch size
model_gru.fit(X_train_padded, y_train_encoded, epochs=epochs, batch_size=batch_size, validation_data=(X_test_padded, y_test_encoded))

Epoch 1/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m192s[0m 36ms/step - accuracy: 0.8372 - loss: 0.3971 - val_accuracy: 0.9391 - val_loss: 0.0964
Epoch 2/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 38ms/step - accuracy: 0.9418 - loss: 0.0905 - val_accuracy: 0.9392 - val_loss: 0.0941
Epoch 3/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 40ms/step - accuracy: 0.9425 - loss: 0.0869 - val_accuracy: 0.9399 - val_loss: 0.0928
Epoch 4/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m214s[0m 41ms/step - accuracy: 0.9432 - loss: 0.0841 - val_accuracy: 0.9387 - val_loss: 0.0930
Epoch 5/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m208s[0m 40ms/step - accuracy: 0.9450 - loss: 0.0814 - val_accuracy: 0.9392 - val_loss: 0.0928
Epoch 6/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m210s[0m 40ms/step - accuracy: 0.9446 - loss: 0.0805 - val_accuracy: 0.9394 - val_loss: 0.097

<keras.src.callbacks.history.History at 0x23df315de10>

In [23]:
# Evaluate the model
loss, accuracy = model_gru.evaluate(X_test_padded, y_test_encoded)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 8ms/step - accuracy: 0.9255 - loss: 0.1074
Test Loss: 0.10832192003726959
Test Accuracy: 0.9256495833396912


In [24]:
# Obtain representations
get_representation_gru = Sequential(model_gru.layers[:-1])  # Remove the output layer
representations_train_gru = get_representation_gru.predict(X_train_padded)
representations_test_gru = get_representation_gru.predict(X_test_padded)

[1m10421/10421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m76s[0m 7ms/step
[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 7ms/step


# bi-LSTM

In [25]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense

In [27]:
# Define the bi-LSTM model
embedding_dim = 100  # Dimension of word embeddings
hidden_units = 64  # Number of units in the LSTM layer

model_bi_lstm = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim),
    Bidirectional(LSTM(units=hidden_units)),
    Dense(units=6, activation='softmax')  # Assuming 6 emotions to classify
])

In [28]:
# Compile the model
model_bi_lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [29]:
# Print model summary
print(model_bi_lstm.summary())

None


In [30]:
# Train the model
epochs = 10  # Define the number of epochs
batch_size = 64  # Define the batch size
model_bi_lstm.fit(X_train_padded, y_train_encoded, epochs=epochs, batch_size=batch_size, validation_data=(X_test_padded, y_test_encoded))

Epoch 1/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m240s[0m 46ms/step - accuracy: 0.8230 - loss: 0.4422 - val_accuracy: 0.9359 - val_loss: 0.1052
Epoch 2/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m241s[0m 46ms/step - accuracy: 0.9384 - loss: 0.0971 - val_accuracy: 0.9385 - val_loss: 0.0959
Epoch 3/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m244s[0m 47ms/step - accuracy: 0.9423 - loss: 0.0891 - val_accuracy: 0.9369 - val_loss: 0.0943
Epoch 4/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 48ms/step - accuracy: 0.9439 - loss: 0.0854 - val_accuracy: 0.9359 - val_loss: 0.0944
Epoch 5/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m246s[0m 47ms/step - accuracy: 0.9444 - loss: 0.0828 - val_accuracy: 0.9331 - val_loss: 0.0960
Epoch 6/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m251s[0m 48ms/step - accuracy: 0.9444 - loss: 0.0819 - val_accuracy: 0.9300 - val_loss: 0.098

<keras.src.callbacks.history.History at 0x23d80424190>

In [31]:
# Evaluate the model
loss, accuracy = model_bi_lstm.evaluate(X_test_padded, y_test_encoded)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 12ms/step - accuracy: 0.9290 - loss: 0.1121
Test Loss: 0.11276523023843765
Test Accuracy: 0.9288764595985413


In [32]:
# Obtain representations
get_representation_bi_lstm = Sequential(model_bi_lstm.layers[:-1])  # Remove the output layer
representations_train_bi_lstm = get_representation_bi_lstm.predict(X_train_padded)
representations_test_bi_lstm = get_representation_bi_lstm.predict(X_test_padded)

[1m10421/10421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m116s[0m 11ms/step
[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 14ms/step


# bi-GRU

In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, GRU, Dense

In [35]:
# Define the bi-GRU model
embedding_dim = 100  # Dimension of word embeddings
hidden_units = 64  # Number of units in the GRU layer

model_bi_gru = Sequential([
    Embedding(input_dim=max_words, output_dim=embedding_dim),
    Bidirectional(GRU(units=hidden_units)),
    Dense(units=6, activation='softmax')  # Assuming 6 emotions to classify
])

In [36]:
# Compile the model
model_bi_gru.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [37]:
# Print model summary
print(model_bi_gru.summary())

None


In [38]:
# Train the model
epochs = 10  # Define the number of epochs
batch_size = 64  # Define the batch size
model_bi_gru.fit(X_train_padded, y_train_encoded, epochs=epochs, batch_size=batch_size, validation_data=(X_test_padded, y_test_encoded))

Epoch 1/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m413s[0m 78ms/step - accuracy: 0.8428 - loss: 0.3833 - val_accuracy: 0.9385 - val_loss: 0.0952
Epoch 2/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m388s[0m 74ms/step - accuracy: 0.9408 - loss: 0.0926 - val_accuracy: 0.9397 - val_loss: 0.0925
Epoch 3/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m396s[0m 76ms/step - accuracy: 0.9418 - loss: 0.0889 - val_accuracy: 0.9378 - val_loss: 0.0936
Epoch 4/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m396s[0m 76ms/step - accuracy: 0.9440 - loss: 0.0835 - val_accuracy: 0.9382 - val_loss: 0.0927
Epoch 5/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m404s[0m 78ms/step - accuracy: 0.9442 - loss: 0.0812 - val_accuracy: 0.9384 - val_loss: 0.0944
Epoch 6/10
[1m5211/5211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m436s[0m 76ms/step - accuracy: 0.9448 - loss: 0.0804 - val_accuracy: 0.9375 - val_loss: 0.096

<keras.src.callbacks.history.History at 0x23d80946b10>

In [39]:
# Evaluate the model
loss, accuracy = model_bi_gru.evaluate(X_test_padded, y_test_encoded)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 13ms/step - accuracy: 0.9274 - loss: 0.1062
Test Loss: 0.10495075583457947
Test Accuracy: 0.9280247688293457


In [40]:
# Obtain representations
get_representation_bi_gru = Sequential(model_bi_gru.layers[:-1])  # Remove the output layer
representations_train_bi_gru = get_representation_bi_gru.predict(X_train_padded)
representations_test_bi_gru = get_representation_bi_gru.predict(X_test_padded)

[1m10421/10421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 12ms/step
[1m2606/2606[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 13ms/step
