In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences


dataset = "/content/urdu-sentiment-corpus-v1.tsv"
df = pd.read_csv(dataset, sep='\t')


texts = df['Tweet'].astype(str)
labels = df['Class'].apply(lambda x: 1 if x == 'P' else 0)


max_words = 10000
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)


maxlen = 100  # Adjust maxlen as needed
data = pad_sequences(sequences, maxlen=maxlen)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.25, random_state=42)


In [None]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, GRU, LSTM, Bidirectional, Dense, Dropout

# Function
def build_model(num_layers, dropout_rate, cell_type):
    model = Sequential()
    model.add(Embedding(max_words, 32, input_length=maxlen))
    for _ in range(num_layers):
        if cell_type == 'RNN':
            model.add(SimpleRNN(32, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True))
        elif cell_type == 'GRU':
            model.add(GRU(32, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True))
        elif cell_type == 'LSTM':
            model.add(LSTM(32, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True))
        elif cell_type == 'BiLSTM':
            model.add(Bidirectional(LSTM(32, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Hyperparameters
num_layers_list = [2, 3]
dropout_rates = [0.3, 0.7]
cell_types = ['RNN', 'GRU', 'LSTM', 'BiLSTM']

results = []

# Train models
for num_layers in num_layers_list:
    for dropout_rate in dropout_rates:
        for cell_type in cell_types:
            print(f"Training {cell_type} with {num_layers} layers and dropout rate {dropout_rate}")
            model = build_model(num_layers, dropout_rate, cell_type)
            model.fit(X_train, y_train, epochs=5, batch_size=32, validation_split=0.1, verbose=0)
            loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
            results.append({'Model': cell_type, 'Layers': num_layers, 'Dropout': dropout_rate, 'Accuracy': accuracy})


results_df = pd.DataFrame(results)
print(results_df)


Training RNN with 2 layers and dropout rate 0.3
Training GRU with 2 layers and dropout rate 0.3
Training LSTM with 2 layers and dropout rate 0.3
Training BiLSTM with 2 layers and dropout rate 0.3
Training RNN with 2 layers and dropout rate 0.7
Training GRU with 2 layers and dropout rate 0.7
Training LSTM with 2 layers and dropout rate 0.7
Training BiLSTM with 2 layers and dropout rate 0.7
Training RNN with 3 layers and dropout rate 0.3
Training GRU with 3 layers and dropout rate 0.3
Training LSTM with 3 layers and dropout rate 0.3
Training BiLSTM with 3 layers and dropout rate 0.3
Training RNN with 3 layers and dropout rate 0.7
Training GRU with 3 layers and dropout rate 0.7
Training LSTM with 3 layers and dropout rate 0.7
Training BiLSTM with 3 layers and dropout rate 0.7
     Model  Layers  Dropout  Accuracy
0      RNN       2      0.3   0.49200
1      GRU       2      0.3   0.49532
2     LSTM       2      0.3   0.49592
3   BiLSTM       2      0.3   0.59600
4      RNN       2      0.