In [75]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.layers import Embedding, GlobalAveragePooling1D, Dense
from sklearn.metrics import classification_report, accuracy_score
from sklearn.metrics import f1_score, recall_score, precision_score, confusion_matrix


In [92]:
# Load datasets
train_data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')

# Simple preprocessing: fill NA values
train_data.fillna('', inplace=True)
test_data.fillna('', inplace=True)

# Combine text data for tokenization
combined_text = train_data['text'].tolist() + test_data['text'].tolist()

# Tokenization
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(combined_text)

# Convert text to sequences
train_sequences = tokenizer.texts_to_sequences(train_data['text'])
test_sequences = tokenizer.texts_to_sequences(test_data['text'])

# Padding
max_length = 200
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding='post')
test_padded = pad_sequences(test_sequences, maxlen=max_length, padding='post')


In [93]:
# Load data
train_data = pd.read_csv('/content/train.csv')
test_data = pd.read_csv('/content/test.csv')

# Preprocess text data
tokenizer = Tokenizer(num_words=10000, oov_token="<OOV>")
tokenizer.fit_on_texts(train_data['text'])

train_sequences = tokenizer.texts_to_sequences(train_data['text'])
train_padded = pad_sequences(train_sequences, maxlen=50, padding='post', truncating='post')

test_sequences = tokenizer.texts_to_sequences(test_data['text'])
test_padded = pad_sequences(test_sequences, maxlen=50, padding='post', truncating='post')


In [87]:
#RNN Model
model = Sequential([
    Embedding(10000, 32, input_length=50),
    SimpleRNN(64, return_sequences=False),
    Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [88]:
model.fit(train_padded, train_data['target'], epochs=5, validation_split=0.2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x78878aa263e0>

In [90]:
# Assuming test_padded contains your preprocessed test data and test_data['target'] are the true labels
test_loss, test_accuracy = model.evaluate(test_padded, test_data['target'])
print(f"Test Accuracy: {test_accuracy}")


Test Accuracy: 0.8044134974479675


In [89]:
# Predicting on the test data
test_predictions = model.predict(test_padded)
test_predictions = [1 if prob > 0.5 else 0 for prob in test_predictions]

# Assuming your test data has a 'target' column with true labels
true_labels = test_data['target']

# Calculate Accuracy
accuracy = accuracy_score(true_labels, test_predictions)
print(f"Accuracy: {accuracy}")

# Other metrics
f1 = f1_score(true_labels, test_predictions)
recall = recall_score(true_labels, test_predictions)
precision = precision_score(true_labels, test_predictions)
conf_matrix = confusion_matrix(true_labels, test_predictions)

# Print the other metrics
print(f'F1 Score: {f1}')
print(f'Recall: {recall}')
print(f'Precision: {precision}')
print(f'Confusion Matrix:\n{conf_matrix}')


Accuracy: 0.8044135032181794
F1 Score: 0.7434085817680509
Recall: 0.6594313665545705
Precision: 0.8518957345971564
Confusion Matrix:
[[3967  375]
 [1114 2157]]
