In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding, GRU, Dense, Dropout, Input


In [14]:
# Load the data
df = pd.read_excel('all_facebook_and_twitter_dataset .xlsx')

# Display the first few rows
print(df.head())

                                            Comments   M-Class
0     እባካችሁ እለታዊ ፖኬጅ ላይም ማሻሻያ አድርጉ ለእኛ ለድሆቹም አስቡ እንጂ  positive
1  መቼነው ቅናሹ የሚጀምረው wifi የመኖሪያባለ mg ባለ mg የነበረው ስን...  positive
2  የቴሌን የተወሰነ ለግሉ ዘርፍ ይሸጣል የተባለው የት ደረሰ ሀገሪቱ ተፎካካ...  negative
3                                     ምነው መልሱ ምላሽ ቆየ  negative
4                        አዎ እናንቴ ሽልማት እያላችሁ በኛ ሙድ ያዙ  negative


In [3]:
# Map the labels to numeric values
label_mapping = {'positive': 2, 'neutral': 1, 'negative': 0}
df['M-Class'] = df['M-Class'].map(label_mapping)

In [5]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(df['Comments'], df['M-Class'], test_size=0.2, random_state=42)

In [6]:
# Define tokenizer parameters
max_features = 10000  # Vocabulary size
max_length = 100      # Maximum length of sequences


In [7]:
# Create a tokenizer and fit it on the training data
tokenizer = Tokenizer(num_words=max_features)
tokenizer.fit_on_texts(X_train)

In [8]:
# Convert text to sequences
X_train_seq = tokenizer.texts_to_sequences(X_train)
X_test_seq = tokenizer.texts_to_sequences(X_test)


In [9]:
# Pad the sequences to the same length
X_train_padded = pad_sequences(X_train_seq, maxlen=max_length)
X_test_padded = pad_sequences(X_test_seq, maxlen=max_length)


In [20]:
# Define input shape
input_shape = (100,)  # Specify the input shape as (sequence length,)

# Define the model using the Functional API
input_layer = Input(shape=input_shape)

# Define the layers
embedding_layer = Embedding(input_dim=10000, output_dim=128)(input_layer)
gru_layer1 = GRU(128, return_sequences=True)(embedding_layer)
gru_layer2 = GRU(64)(gru_layer1)
dropout_layer = Dropout(0.5)(gru_layer2)
output_layer = Dense(3, activation='softmax')(dropout_layer)

# Create the model
model = Model(inputs=input_layer, outputs=output_layer)


In [21]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [22]:
# Print model summary
print(model.summary())

None


In [23]:
# Train the model
history = model.fit(X_train_padded, y_train, epochs=10, batch_size=32, validation_split=0.2)


Epoch 1/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 134ms/step - accuracy: 0.5883 - loss: 0.8741 - val_accuracy: 0.7367 - val_loss: 0.6601
Epoch 2/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 133ms/step - accuracy: 0.8251 - loss: 0.4589 - val_accuracy: 0.7460 - val_loss: 0.6582
Epoch 3/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 132ms/step - accuracy: 0.8951 - loss: 0.2868 - val_accuracy: 0.7362 - val_loss: 0.7511
Epoch 4/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 128ms/step - accuracy: 0.9251 - loss: 0.2103 - val_accuracy: 0.7236 - val_loss: 0.8424
Epoch 5/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 132ms/step - accuracy: 0.9370 - loss: 0.1765 - val_accuracy: 0.7152 - val_loss: 0.9017
Epoch 6/10
[1m268/268[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 129ms/step - accuracy: 0.9387 - loss: 0.1610 - val_accuracy: 0.7110 - val_loss: 1.0383
Epoch 7/10

In [24]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test_padded, y_test)

# Print the test accuracy
print(f'Test Accuracy: {test_accuracy:.4f}')

# Predict the labels on the test set
y_pred = model.predict(X_test_padded)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate precision, recall, and F1-score
precision, recall, f1, _ = precision_recall_fscore_support(y_test, y_pred_classes, average='weighted')

# Print the evaluation metrics
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')


[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 33ms/step - accuracy: 0.7144 - loss: 1.0555
Test Accuracy: 0.7024
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 41ms/step
Precision: 0.7144
Recall: 0.7024
F1-Score: 0.7079
