## 整合嵌入特徵到分類模型

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf

In [None]:
# 文字嵌入處理
train_embeddings = model.encode(train_df['text'].tolist(), show_progress_bar=True)
val_embeddings = model.encode(val_df['text'].tolist(), show_progress_bar=True)
test_embeddings = model.encode(test_df['text'].tolist(), show_progress_bar=True)

In [None]:
# 特徵標準化
scaler = StandardScaler()
train_embeddings = scaler.fit_transform(train_embeddings)
val_embeddings = scaler.transform(val_embeddings)
test_embeddings = scaler.transform(test_embeddings)

In [None]:
# 將嵌入特徵添加到 BERT 編碼中
train_encodings['additional_features'] = tf.convert_to_tensor(train_embeddings)
val_encodings['additional_features'] = tf.convert_to_tensor(val_embeddings)
test_encodings['additional_features'] = tf.convert_to_tensor(test_embeddings)

In [None]:
# 修改模型結構以接受額外特徵
class CombinedModel(tf.keras.Model):
    def __init__(self, base_model, num_labels, embedding_dim):
        super(CombinedModel, self).__init__()
        self.base_model = base_model
        self.additional_dense = tf.keras.layers.Dense(embedding_dim, activation='relu')
        self.concat_layer = tf.keras.layers.Concatenate()
        self.classifier = tf.keras.layers.Dense(num_labels, activation='softmax')

    def call(self, inputs, training=False):
        base_output = self.base_model(inputs)
        additional_features = self.additional_dense(inputs['additional_features'])
        combined_output = self.concat_layer([base_output, additional_features])
        return self.classifier(combined_output)

In [None]:
combined_model = CombinedModel(model, num_labels=2, embedding_dim=train_embeddings.shape[1])
combined_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

## 多模型結果融合

In [None]:
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

In [None]:
# 透過預測結果進行融合
bert_predictions = model.predict(test_dataset)
bilstm_predictions = training_model.predict(X_test_sequences)

In [None]:
# 投票融合
final_predictions = (bert_predictions + bilstm_predictions) / 2
final_predictions = np.round(final_predictions).astype(int)

## 視覺化

In [None]:
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

In [None]:
# 使用 t-SNE 可視化分類結果
embeddings_tsne = TSNE(n_components=2, random_state=42).fit_transform(train_embeddings)

plt.figure(figsize=(10, 8))
for label in np.unique(train_df['generated']):
    idx = train_df['generated'] == label
    plt.scatter(embeddings_tsne[idx, 0], embeddings_tsne[idx, 1], label=f'Class {label}')
plt.legend()
plt.title('t-SNE Visualization of Embeddings')
plt.show()

## 整合後的結果
1. BERT 模型與 SentenceTransformer 結合，提高分類模型對文字特徵的感知能力。
2. 降維後的視覺化提供分類結果的直觀解釋。
3. 多模型融合有效減少單一模型偏差的風險。