In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix

# 1. Data Preprocessing
def load_and_preprocess_data(file_path):
    # Load data
    # Clean text
    # Return processed data

def prepare_sequences(texts, max_words, max_len):
    tokenizer = Tokenizer(num_words=max_words)
    tokenizer.fit_on_texts(texts)
    sequences = tokenizer.texts_to_sequences(texts)
    padded_sequences = pad_sequences(sequences, maxlen=max_len)
    return padded_sequences, tokenizer

# 2. Model Building
def build_gru_model(max_words, max_len, embedding_dim, gru_units, num_classes):
    model = Sequential([
        Embedding(max_words, embedding_dim, input_length=max_len),
        GRU(gru_units, return_sequences=True),
        GRU(gru_units),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# 3. Training
def train_model(model, X_train, y_train, epochs, batch_size):
    callbacks = [
        EarlyStopping(patience=3, restore_best_weights=True),
        ModelCheckpoint('best_gru_model.h5', save_best_only=True)
    ]
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
                        validation_split=0.2, callbacks=callbacks)
    return history

# 4. Evaluation
def evaluate_model(model, X_test, y_test):
    loss, accuracy = model.evaluate(X_test, y_test)
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true = np.argmax(y_test, axis=1)
    print(classification_report(y_true, y_pred_classes))
    print(confusion_matrix(y_true, y_pred_classes))
    return accuracy

# Main execution
if __name__ == "__main__":
    # Load and preprocess data
    X, y = load_and_preprocess_data('path_to_dataset.csv')
    
    # Prepare sequences
    X_padded, tokenizer = prepare_sequences(X, max_words=10000, max_len=200)
    
    # Encode labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X_padded, y_encoded, test_size=0.2, random_state=42)
    
    # Build model
    model = build_gru_model(max_words=10000, max_len=200, embedding_dim=128, gru_units=64, num_classes=len(np.unique(y)))
    
    # Train model
    history = train_model(model, X_train, y_train, epochs=10, batch_size=32)
    
    # Evaluate model
    accuracy = evaluate_model(model, X_test, y_test)
    
    print(f"Final accuracy: {accuracy}")

    # 5. Analysis and Comparison
    # Document your findings and prepare for comparison with other models
