In [7]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense, Dropout
from sklearn.model_selection import train_test_split


In [8]:
df = pd.read_csv("Synthetic_App_Reviews.csv")
texts = df["Review Text"].astype(str).tolist()
labels = df["Recommended"].astype(int).tolist()


In [9]:
vocab_size = 1000
max_len = 50

tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')


In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.2, random_state=42
)


In [11]:
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=64, input_length=max_len),
    SimpleRNN(64),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()




In [12]:
history = model.fit(np.array(X_train), np.array(y_train), 
                    validation_data=(np.array(X_test), np.array(y_test)), 
                    epochs=10, batch_size=4)


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 861ms/step - accuracy: 0.6111 - loss: 0.6774 - val_accuracy: 0.5000 - val_loss: 0.6989
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 111ms/step - accuracy: 1.0000 - loss: 0.6246 - val_accuracy: 0.5000 - val_loss: 0.6961
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step - accuracy: 0.8056 - loss: 0.6208 - val_accuracy: 0.5000 - val_loss: 0.6966
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step - accuracy: 0.6944 - loss: 0.6015 - val_accuracy: 0.5000 - val_loss: 0.6895
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - accuracy: 0.6111 - loss: 0.5504 - val_accuracy: 0.5000 - val_loss: 0.7288
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step - accuracy: 0.8056 - loss: 0.4664 - val_accuracy: 0.0000e+00 - val_loss: 0.8161
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━

In [13]:
loss, accuracy = model.evaluate(np.array(X_test), np.array(y_test))
print(f"Test Accuracy: {accuracy:.2f}")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 77ms/step - accuracy: 0.0000e+00 - loss: 0.9264
Test Accuracy: 0.00


In [16]:
def predict_review(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=max_len, padding='post')
    prediction = model.predict(padded)[0][0]
    return f" Recommended" if prediction >= 0.5 else f" Not Recommended"

print(predict_review("The app is really useful and intuitive"))
print(predict_review("Terrible"))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
 Recommended
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
 Not Recommended
