In [2]:
import keras.utils
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import LSTM, Embedding, Dense, Conv1D, GlobalMaxPooling1D
from keras.preprocessing.text import Tokenizer
from keras.losses import binary_crossentropy
from sklearn.metrics import roc_curve, roc_auc_score
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.layers import Bidirectional, LSTM
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from sklearn.metrics import roc_curve, roc_auc_score
from sklearn.base import BaseEstimator
from wittgenstein import RIPPER
from wittgenstein.interpret import interpret_model, score_fidelity
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [3]:
label_encoder = LabelEncoder()
data = pd.read_csv('TV.csv')

# # Split the data into training and validation sets
X = data['Nhận xét đánh giá'].values
y = data['Cảm xúc'].values
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Encode the labels
y_train = label_encoder.fit_transform(y_train)
y_val = label_encoder.transform(y_val)


In [4]:
max_words = 5000  # Maximum number of words to consider
max_len = 1000  # Maximum length of a sequence


# Tokenize and pad sequences
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(X_train)
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_val_seq = tokenizer.texts_to_sequences(X_val)
X_train_seq = keras.utils.pad_sequences(X_train_seq, maxlen=max_len)
X_val_seq = keras.utils.pad_sequences(X_val_seq, maxlen=max_len)

In [5]:
embedding_dim = 120  # Dimensionality of word embeddings
num_filters = 128  # Number of filters in the convolutional layer
kernel_size = 5
num_classes = len(label_encoder.classes_)  # Number of output classes
model = Sequential()
model.add(Embedding(max_words, embedding_dim, input_length=max_len))
model.add(Conv1D(num_filters, kernel_size, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])




In [6]:
batch_size = 64
epochs = 10

model.fit(X_train_seq, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_val_seq, y_val))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1c89947b5e0>

In [7]:
loss, accuracy = model.evaluate(X_val_seq, y_val)
print(f"Validation loss: {loss}")
print(f"Validation accuracy: {accuracy}")

Validation loss: 0.929019570350647
Validation accuracy: 0.6945782899856567


In [None]:
predictions = model.predict(padded_sequences)

cnn_feature_extractor = Sequential()
cnn_feature_extractor.add(model.layers[0])  # Add layers up to the desired intermediate layer
cnn_feature_extractor.add(model.layers[1])  # Add more layers if needed

# Extract features from the intermediate CNN layer
X_train_cnn_features = cnn_feature_extractor.predict(X_train_seq)
X_val_cnn_features = cnn_feature_extractor.predict(X_val_seq)

# 3. Train the RIPPER model on the extracted features
ripper_classifier = RIPPER()
ripper_classifier.fit(X_train_cnn_features, y_train)

# 4. Combine predictions from CNN and RIPPER (you can adjust this part as needed)
cnn_predictions = model.predict(X_val_seq)
ripper_predictions = ripper_classifier.predict(X_val_cnn_features)

# You can use some combination strategy to merge predictions (e.g., weighted average)
combined_predictions = 0.7 * cnn_predictions + 0.3 * ripper_predictions

# Evaluate the combined predictions
combined_loss, combined_accuracy = model.evaluate(X_val_seq, combined_predictions)
print(f"Combined Validation loss: {combined_loss}")
print(f"Combined Validation accuracy: {combined_accuracy}")



In [None]:
y_pred = model.predict(X_val_seq)
# Convert numerical labels to original labels
y_pred_labels = np.argmax(y_pred, axis=1)
y_pred_labels = label_encoder.inverse_transform(y_pred_labels)

# Convert true labels to original labels
y_val_labels = label_encoder.inverse_transform(y_val)

# Generate classification report
report = classification_report(y_val_labels, y_pred_labels)
print(report)