In [1]:
import numpy as np
import tensorflow as tf
from transformers import BertTokenizer, TFBertModel

model_name = "bert-base-chinese"
tokenizer = BertTokenizer.from_pretrained(model_name)
bert_model = TFBertModel.from_pretrained(model_name)
texts = ["這是一個很棒的產品！", "這家餐廳的服務很差...", "電影的劇情令人感動！"]
inputs = tokenizer(
    texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
outputs = bert_model(**inputs)
cls_embeddings = outputs.last_hidden_state[:, 0, :]
print("BERT 特徵維度:", cls_embeddings.shape)
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.models import Model

input_layer = Input(shape=(768,))  # BERT 特徵維度
x = Dense(128, activation="relu")(input_layer)
x = Dense(64, activation="relu")(x)
output_layer = Dense(2, activation="softmax")(x)  # 二分類問題





Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

BERT 特徵維度: (3, 768)


In [2]:
classifier = Model(inputs=input_layer, outputs=output_layer)
classifier.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
classifier.summary()
X_train = np.random.rand(100, 768)  # 100 筆訓練樣本，每個 768 維
y_train = np.random.randint(0, 2, 100)  # 0（負面）或 1（正面）
classifier.fit(X_train, y_train, epochs=5, batch_size=8)
test_texts = ["這部電影真的很棒！", "這間餐廳讓我失望透頂...", "這款手機的性能很不錯"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]  # 取 CLS 向量
predictions = classifier.predict(test_cls_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)
# 假設 1 代表正向，0 代表負向

Epoch 1/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5337 - loss: 0.7177
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6330 - loss: 0.6229
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6192 - loss: 0.6338 
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7096 - loss: 0.5469
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7583 - loss: 0.4564 
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 165ms/step
預測結果: [0 1 0]


In [5]:
test_texts = ["這部電影不錯，但是有點無聊。", "這部電影有點無聊，但是不錯。"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
predictions = classifier.predict(test_cls_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)
test_texts = ["這是一個非常非常非常棒的產品！"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
predictions = classifier.predict(test_cls_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
預測結果: [0 0]
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step
預測結果: [0]


In [3]:
test_texts = ["這是一個非常非常非常棒的產品！"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
predictions = classifier.predict(test_cls_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
預測結果: [0]


In [4]:
from tensorflow.keras.layers import LSTM

input_layer = Input(shape=(768, 1))  # 修改
x = LSTM(128, return_sequences=False)(input_layer)  # 修改  LSTM
x = Dense(64, activation="relu")(x)
output_layer = Dense(2, activation="softmax")(x)
lstm_classifier = Model(inputs=input_layer, outputs=output_layer)
lstm_classifier.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
X_train_lstm = X_train.reshape(-1, 768, 1)
lstm_classifier.fit(X_train_lstm, y_train, epochs=5, batch_size=8)

Epoch 1/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 258ms/step - accuracy: 0.4924 - loss: 0.6976
Epoch 2/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 247ms/step - accuracy: 0.4798 - loss: 0.6933
Epoch 3/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 263ms/step - accuracy: 0.4790 - loss: 0.6944
Epoch 4/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 252ms/step - accuracy: 0.5009 - loss: 0.6939
Epoch 5/5
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 251ms/step - accuracy: 0.5937 - loss: 0.6817


<keras.src.callbacks.history.History at 0x1c26c06d160>

In [5]:
test_texts = ["這部電影真的很棒！", "這間餐廳讓我失望透頂...", "這款手機的性能很不錯"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
predictions = lstm_classifier.predict(test_cls_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 350ms/step
預測結果: [1 0 1]


In [6]:
test_texts = ["這部電影不錯，但是有點無聊。", "這部電影有點無聊，但是不錯。"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
predictions = lstm_classifier.predict(test_cls_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 459ms/step
預測結果: [0 0]


In [7]:
test_texts = ["這是一個非常非常非常棒的產品！"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
predictions = lstm_classifier.predict(test_cls_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
預測結果: [0]


In [1]:
from tensorflow.keras.layers import LSTM, Dense, Input
from transformers import (  # LSTM之後的修改? 使用 BERT 隱層輸出（整個序列）作為 LSTM 輸入 。
    BertTokenizer,
    TFBertModel,
)

model_name = "bert-base-chinese"
tokenizer = BertTokenizer.from_pretrained(model_name)
bert_model = TFBertModel.from_pretrained(model_name)
texts = ["這是一個很棒的產品！", "這家餐廳的服務很差...", "電影的劇情令人感動！"]
inputs = tokenizer(
    texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
outputs = bert_model(**inputs)
sequence_output = outputs.last_hidden_state  # (batch_size, seq_length, hidden_dim)
input_layer = Input(shape=(512, 768))  # BERT 由50改為預設的512
x = LSTM(128, return_sequences=True)(input_layer)
x = LSTM(64)(x)
output_layer = Dense(2, activation="softmax")(x)  # 二分類
lstm_model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
lstm_model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
import numpy as np

X_train = np.random.rand(
    100, 512, 768
)  # 100 筆訓練樣本，每個 512 tokens，每個 token 768 維
y_train = np.random.randint(0, 2, 100)
lstm_model.fit(X_train, y_train, epochs=15, batch_size=8)





Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 556ms/step - accuracy: 0.5523 - loss: 0.7897
Epoch 2/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 559ms/step - accuracy: 0.5139 - loss: 0.6974
Epoch 3/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 527ms/step - accuracy: 0.4577 - loss: 0.7375
Epoch 4/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 547ms/step - accuracy: 0.6477 - loss: 0.6797
Epoch 5/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 574ms/step - accuracy: 0.5545 - loss: 0.6964
Epoch 6/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 513ms/step - accuracy: 0.5410 - loss: 0.6805
Epoch 7/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 531ms/step - accuracy: 0.5890 - loss: 0.6664
Epoch 8/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 512ms/step - accuracy: 0.6075 - loss: 0.6585
Epoch 9/15
[1m13/13[0m [32m━━━━━━━

<keras.src.callbacks.history.History at 0x21a71937140>

In [7]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.models import Model

test_texts = ["這部電影真的很棒！", "這間餐廳讓我失望透頂...", "這款手機的性能很不錯"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
#
# inputs = tokenizer(texts, return_tensors="tf", padding=True, truncation=True, max_length=512)
# outputs = bert_model(**inputs)
#
# test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
test_cls_embeddings = test_outputs.last_hidden_state
print(test_cls_embeddings.shape)
print(type(test_inputs))
print(test_inputs.keys())  # 確保包含 'input_ids' 和 'attention_mask'


# LSTM 輸入層
input_layer = Input(shape=(None, 768))  # `None` 允許變長輸入
# input_layer = Input(shape=(512, 768))  # 512 為 BERT 序列長度, 768 為 BERT hidden size
x = LSTM(128, return_sequences=True)(input_layer)
x = LSTM(64)(x)
output_layer = Dense(2, activation="softmax")(x)  # 二分類

lstm_model = Model(inputs=input_layer, outputs=output_layer)
lstm_model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)

# 假設訓練數據
X_train = np.random.rand(
    100, 512, 768
)  # 100 筆訓練樣本，每個 512 tokens，每個 token 768 維
y_train = np.random.randint(0, 2, 100)

# 訓練 LSTM
lstm_model.fit(X_train, y_train, epochs=15, batch_size=8)

# 測試時轉換為 NumPy
test_cls_embeddings_np = test_cls_embeddings.numpy()
predictions = lstm_model.predict(test_cls_embeddings_np)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

(3, 15, 768)
<class 'transformers.tokenization_utils_base.BatchEncoding'>
dict_keys(['input_ids', 'token_type_ids', 'attention_mask'])
Epoch 1/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 534ms/step - accuracy: 0.4635 - loss: 0.7107
Epoch 2/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 543ms/step - accuracy: 0.4769 - loss: 0.6937
Epoch 3/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 547ms/step - accuracy: 0.5572 - loss: 0.7090
Epoch 4/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 559ms/step - accuracy: 0.5709 - loss: 0.7007
Epoch 5/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 599ms/step - accuracy: 0.5592 - loss: 0.6754
Epoch 6/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 622ms/step - accuracy: 0.6631 - loss: 0.6653
Epoch 7/15
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 614ms/step - accuracy: 0.6746 - loss: 0.6521
Epoch 8/15
[1m13/13[0

In [8]:
test_texts = ["這部電影不錯，但是有點無聊。", "這部電影有點無聊，但是不錯。"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state
# 測試時轉換為 NumPy
test_cls_embeddings_np = test_cls_embeddings.numpy()
predictions = lstm_model.predict(test_cls_embeddings_np)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 545ms/step
預測結果: [0 0]


In [9]:
test_texts = ["這是一個非常非常非常棒的產品！"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state
# 測試時轉換為 NumPy
test_cls_embeddings_np = test_cls_embeddings.numpy()
predictions = lstm_model.predict(test_cls_embeddings_np)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
預測結果: [0]


In [14]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import LSTM, Dense, Input, Multiply
from tensorflow.keras.models import Model
from transformers import (  # LSTM之後的修改? 使用 BERT 隱層輸出（整個序列）作為 LSTM 輸入 。
    BertTokenizer,
    TFBertModel,
)

model_name = "bert-base-chinese"
tokenizer = BertTokenizer.from_pretrained(model_name)
bert_model = TFBertModel.from_pretrained(model_name)

# 假設訓練數據
X_train = np.random.rand(
    1000, 512, 768
)  # 100 筆訓練樣本，每個 512 tokens，每個 token 768 維
y_train = np.random.randint(0, 2, 1000)


class AttentionLayer(tf.keras.layers.Layer):
    def __init__(self, **kwargs):
        super(AttentionLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.W = self.add_weight(
            shape=(input_shape[-1],), initializer="random_normal", trainable=True
        )
        super(AttentionLayer, self).build(input_shape)

    def call(self, inputs):
        score = tf.nn.softmax(tf.tensordot(inputs, self.W, axes=1), axis=1)  # 計算權重
        return Multiply()([inputs, tf.expand_dims(score, -1)])  # 權重應用到輸入


input_layer = Input(shape=(None, 768))

# x = LSTM(128, return_sequences=True)(input_layer)
# x = LSTM(64)(x)

x = LSTM(256, dropout=0.2, return_sequences=True)(input_layer)
x = AttentionLayer()(x)  # 應用注意力機制
x = LSTM(128, dropout=0.2, return_sequences=True)(x)
x = AttentionLayer()(x)  # 應用注意力機制
x = LSTM(64, dropout=0.2)(x)
output_layer = Dense(2, activation="softmax")(x)
lstm_attention_model = tf.keras.Model(inputs=input_layer, outputs=output_layer)
lstm_attention_model.compile(
    optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
)
lstm_attention_model.fit(X_train, y_train, epochs=15, batch_size=128)

test_texts = ["這部電影不錯，但是有點無聊。", "這部電影有點無聊，但是不錯。"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
# test_cls_embeddings = test_outputs.last_hidden_state[:, 0, :]
test_embeddings = test_outputs.last_hidden_state  # 保持形狀 (batch_size, 512, 768)

predictions = lstm_attention_model.predict(test_embeddings)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

test_texts = ["這是一個非常非常非常棒的產品！"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state
# 測試時轉換為 NumPy
test_cls_embeddings_np = test_cls_embeddings.numpy()
predictions = lstm_attention_model.predict(test_cls_embeddings_np)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)


test_texts = ["這部電影真的很棒！", "這間餐廳讓我失望透頂...", "這款手機的性能很不錯"]
test_inputs = tokenizer(
    test_texts, return_tensors="tf", padding=True, truncation=True, max_length=512
)
test_outputs = bert_model(**test_inputs)
test_cls_embeddings = test_outputs.last_hidden_state
# 測試時轉換為 NumPy
test_cls_embeddings_np = test_cls_embeddings.numpy()
predictions = lstm_attention_model.predict(test_cls_embeddings_np)
predicted_labels = predictions.argmax(axis=1)  # 取最大機率的類別
print("預測結果:", predicted_labels)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

Epoch 1/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 10s/step - accuracy: 0.5216 - loss: 0.6932 
Epoch 2/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 10s/step - accuracy: 0.4953 - loss: 0.6933 
Epoch 3/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m82s[0m 10s/step - accuracy: 0.5077 - loss: 0.6932 
Epoch 4/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 10s/step - accuracy: 0.4792 - loss: 0.6932 
Epoch 5/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 11s/step - accuracy: 0.5024 - loss: 0.6932
Epoch 6/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 11s/step - accuracy: 0.4863 - loss: 0.6932 
Epoch 7/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m90s[0m 11s/step - accuracy: 0.4866 - loss: 0.6932 
Epoch 8/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m89s[0m 11s/step - accuracy: 0.5156 - loss: 0.6931 
Epoch 9/15
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37