In [1]:
import pandas as pd
import numpy as np
from keras.layers import Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
df = pd.read_excel('all_facebook_and_twitter_dataset.xlsx')

# Preprocessing
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['Comments'])
vocab_size = len(tokenizer.word_index) + 1

# Convert texts to sequences
sequences = tokenizer.texts_to_sequences(df['Comments'])

# Define max length for padding
max_length = max(len(seq) for seq in sequences)

# Pad sequences to the same length
padded_sequences = pad_sequences(sequences, maxlen=max_length, padding='post')

# Encode labels
label_encoder = LabelEncoder()
df['M-Class'] = label_encoder.fit_transform(df['M-Class'])

# Function to create and compile the RNN model
def create_model():
    model = Sequential([
        Embedding(input_dim=vocab_size, output_dim=128),
        SimpleRNN(64, return_sequences=False),
        Dropout(0.5),
        Dense(3, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Function to perform k-fold cross-validation
def evaluate_model_kfold(k=10):
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    
    accuracies = []
    precisions = []
    recalls = []
    f1_scores = []
    
    for train_index, test_index in kf.split(padded_sequences):
        X_train, X_test = padded_sequences[train_index], padded_sequences[test_index]
        y_train, y_test = df['M-Class'].iloc[train_index], df['M-Class'].iloc[test_index]
        
        model = create_model()
        model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=0)
        
        y_pred_probs = model.predict(X_test)
        y_pred_classes = np.argmax(y_pred_probs, axis=1)
        
        report = classification_report(y_test, y_pred_classes, target_names=label_encoder.classes_, output_dict=True, zero_division=1)
        
        accuracies.append(report['accuracy'])
        precisions.append(report['macro avg']['precision'])
        recalls.append(report['macro avg']['recall'])
        f1_scores.append(report['macro avg']['f1-score'])
    
    return {
        'accuracy': np.mean(accuracies),
        'precision': np.mean(precisions),
        'recall': np.mean(recalls),
        'f1_score': np.mean(f1_scores)
    }

# Evaluate the model using k-fold cross-validation for k=3, k=5, and k=10
for k in [3, 5, 10]:
    results = evaluate_model_kfold(k=k)
    print(f"Results from {k}-fold cross-validation:")
    print(f"Accuracy: {results['accuracy']:.4f}")
    print(f"Precision: {results['precision']:.4f}")
    print(f"Recall: {results['recall']:.4f}")
    print(f"F1-score: {results['f1_score']:.4f}")
    print("\n")


[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 19ms/step
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms/step
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 18ms/step
Results from 3-fold cross-validation:
Accuracy: 0.4547
Precision: 0.5884
Recall: 0.3389
F1-score: 0.3842


[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step
[1m84/84[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step
Results from 5-fold cross-validation:
Accuracy: 0.4846
Precision: 0.7646
Recall: 0.3520
F1-score: 0.2689


[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━