# Bruno's Sentiment Analysis With Neural Networks

In [1]:
pip install Keras-Preprocessing

Collecting Keras-Preprocessing
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.6/42.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Keras-Preprocessing
Successfully installed Keras-Preprocessing-1.1.2
Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [3]:
threads_reviews = pd.read_csv('/kaggle/input/threads-app-reviews-dataset/threads_reviews.csv')
threads_reviews.head()

Unnamed: 0,source,review_description,rating,review_date
0,Google Play,Very good app for Android phone and me,5,27-08-2023 10:31
1,Google Play,Sl👍👍👍👍,5,27-08-2023 10:28
2,Google Play,Best app,5,27-08-2023 9:47
3,Google Play,Gatiya app,1,27-08-2023 9:13
4,Google Play,Lit bruv,5,27-08-2023 9:00


### Categorize Sentiment by Rating

In [4]:
def categorize_sentiment(rating):
    if rating >= 4:
        return 'positive'
    elif rating == 3:
        return 'neutral'
    else:
        return 'negative'

threads_reviews['sentiment'] = threads_reviews['rating'].apply(categorize_sentiment)

threads_reviews.head()

Unnamed: 0,source,review_description,rating,review_date,sentiment
0,Google Play,Very good app for Android phone and me,5,27-08-2023 10:31,positive
1,Google Play,Sl👍👍👍👍,5,27-08-2023 10:28,positive
2,Google Play,Best app,5,27-08-2023 9:47,positive
3,Google Play,Gatiya app,1,27-08-2023 9:13,negative
4,Google Play,Lit bruv,5,27-08-2023 9:00,positive


### Preparing Data for Model Training

In [5]:
X = threads_reviews['review_description']
y = threads_reviews['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

X_train.shape, X_test.shape

((32348,), (8087,))

#### Tokenizing Text

In [6]:
from keras.preprocessing.text import Tokenizer
from keras_preprocessing.sequence import pad_sequences

max_words = 5000
max_len = 200

tokenizer = Tokenizer(num_words=max_words, oov_token='<OOV>')
tokenizer.fit_on_texts(X_train)

X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

X_train_padded = pad_sequences(X_train_sequences, maxlen=max_len, padding='post', truncating='post')
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_len, padding='post', truncating='post')

X_train_padded.shape, X_test_padded.shape

((32348, 200), (8087, 200))

#### Neural Network Architecture

In [7]:
from keras.models import Sequential
from keras.layers import Embedding, GlobalAveragePooling1D, Dense

model = Sequential([
    Embedding(max_words, 24, input_length=max_len),
    GlobalAveragePooling1D(),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


In [8]:
from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)

y_train_categorical = to_categorical(y_train_encoded)
y_test_categorical = to_categorical(y_test_encoded)

#### Run model 15 times and check its accuracy

In [9]:
history = model.fit(X_train_padded, y_train_categorical, epochs=15, validation_data=(X_test_padded, y_test_categorical), batch_size=32)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [10]:
loss, accuracy = model.evaluate(X_test_padded, y_test_categorical)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

Test Accuracy: 79.51%


In [11]:
model.save('sentiment_analysis_threads.keras')