In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
import pandas as pd
df= pd.read_csv('data.csv')
df.columns= ['Sentence', 'Sentiment']

In [None]:
df

Unnamed: 0,Sentence,Sentiment
0,The GeoSolutions technology will leverage Bene...,positive
1,"$ESI on lows, down $1.50 to $2.50 BK a real po...",negative
2,"For the last quarter of 2010 , Componenta 's n...",positive
3,According to the Finnish-Russian Chamber of Co...,neutral
4,The Swedish buyout firm has sold its remaining...,neutral
...,...,...
5837,RISING costs have forced packaging producer Hu...,negative
5838,Nordic Walking was first used as a summer trai...,neutral
5839,"According shipping company Viking Line , the E...",neutral
5840,"In the building and home improvement trade , s...",neutral


In [None]:
df.isnull().sum()

Sentence     0
Sentiment    0
dtype: int64

In [None]:
df=df.dropna(axis='rows')

In [None]:
df.shape

(5842, 2)

In [None]:
df.Sentiment.value_counts()

Sentiment
neutral     3130
positive    1852
negative     860
Name: count, dtype: int64

In [None]:
val=df.Sentiment
val

0       positive
1       negative
2       positive
3        neutral
4        neutral
          ...   
5837    negative
5838     neutral
5839     neutral
5840     neutral
5841    positive
Name: Sentiment, Length: 5842, dtype: object

In [None]:
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
labels= le.fit_transform(val)
labels

array([2, 0, 2, ..., 1, 1, 2])

In [None]:
sentences= df.Sentence
sentences[0]

"The GeoSolutions technology will leverage Benefon 's GPS solutions by providing Location Based Search Technology , a Communities Platform , location relevant multimedia content and a new and powerful commercial model ."

In [None]:
# Tokenize the sentences
tokenizer = Tokenizer()
clean_sentences = [sentence for sentence in sentences ]
len(clean_sentences)

5842

In [None]:
tokenizer.fit_on_texts(clean_sentences)
word_index = tokenizer.word_index

In [None]:


# Convert sentences to sequences of indices
sequences = tokenizer.texts_to_sequences(clean_sentences)

# Pad sequences to ensure they have the same length
maxlen = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=maxlen)

padded_sequences.shape


(5842, 71)

In [None]:
labels.shape

(5842,)

In [None]:
np.unique(labels)

array([0, 1, 2])

In [None]:
# Define RNN model
embedding_dim = 16
vocab_size = len(word_index) + 1

# Define RNN model for multi-class classification
model = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=maxlen),
    SimpleRNN(32),  # Simple RNN layer with 32 units
    Dense(4, activation='softmax')  # Output layer with softmax activation for multi-class classification
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(padded_sequences, labels, epochs=10, verbose=1)

# Test the model
test_sentence = 'This movie is amazing'
test_sequence = tokenizer.texts_to_sequences([test_sentence])
padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
prediction = model.predict(padded_test_sequence)
predicted_class = np.argmax(prediction)
print("Sentiment prediction:", predicted_class)




Epoch 1/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 23ms/step - accuracy: 0.5006 - loss: 1.1156
Epoch 2/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.6594 - loss: 0.8198
Epoch 3/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.7982 - loss: 0.5409
Epoch 4/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.8523 - loss: 0.3654
Epoch 5/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 23ms/step - accuracy: 0.8877 - loss: 0.2696
Epoch 6/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 24ms/step - accuracy: 0.8962 - loss: 0.2067
Epoch 7/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 25ms/step - accuracy: 0.8965 - loss: 0.1906
Epoch 8/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 24ms/step - accuracy: 0.8949 - loss: 0.1761
Epoch 9/10
[1m183/183[0m [32m

In [None]:
import gradio as gr   # library used for


# web interface for project
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Define the prediction function
def predict_sentiment(text):
    # Preprocess the input text
    test_sequence = tokenizer.texts_to_sequences([text])
    padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
    # Predict sentiment using the model
    prediction = model.predict(padded_test_sequence)
    predicted_class = np.argmax(prediction)

    # Use LabelEncoder to map predicted class index to original sentiment label
    predicted_label = le.inverse_transform([predicted_class])[0]

    return predicted_label

# Create a Gradio interface
iface = gr.Interface(fn=predict_sentiment,
                     inputs="text",
                     outputs="text",
                     title="Sentiment Analysis",
                     description="Enter a text to predict its sentiment (negative, somewhat negative, somewhat positive, or positive).")

# Launch the interface
iface.launch()


Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 554ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 80ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step


In [None]:
def predict_sentiment(text):
    # Preprocess the input text
    test_sequence = tokenizer.texts_to_sequences([text])
    padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
    # Predict sentiment using the model
    prediction = model.predict(padded_test_sequence)
    predicted_class = np.argmax(prediction)

    # Use LabelEncoder to map predicted class index to original sentiment label
    predicted_label = le.inverse_transform([predicted_class])[0]

    return predicted_label

In [None]:

from tensorflow.keras.layers import LSTM, Dropout

# Define a more complex LSTM model for multi-class classification
model_complex_lstm = Sequential([
    Embedding(vocab_size, embedding_dim, input_length=maxlen),
    LSTM(64, return_sequences=True),  # First LSTM layer with 64 units and return sequences
    Dropout(0.5),  # Dropout layer to reduce overfitting
    LSTM(32),  # Second LSTM layer with 32 units
    Dropout(0.5),  # Dropout layer
    Dense(64, activation='relu'),  # Dense layer with ReLU activation
    Dense(4, activation='softmax')  # Output layer with softmax activation for multi-class classification
])



In [None]:
model_complex_lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
model_complex_lstm.fit(padded_sequences, labels, epochs=10, verbose=1)


Epoch 1/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 76ms/step - accuracy: 0.5021 - loss: 1.1007
Epoch 2/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 82ms/step - accuracy: 0.5310 - loss: 0.9965
Epoch 3/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 80ms/step - accuracy: 0.6450 - loss: 0.8253
Epoch 4/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 77ms/step - accuracy: 0.7597 - loss: 0.6123
Epoch 5/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 90ms/step - accuracy: 0.8204 - loss: 0.4419
Epoch 6/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 77ms/step - accuracy: 0.8728 - loss: 0.3024
Epoch 7/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 77ms/step - accuracy: 0.8924 - loss: 0.2324
Epoch 8/10
[1m183/183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 88ms/step - accuracy: 0.8996 - loss: 0.2084
Epoch 9/10
[1m183/183[

<keras.src.callbacks.history.History at 0x1eac87e9550>

In [None]:
# Test the complex LSTM model
test_sentence = 'This movie is amazing'
test_sequence = tokenizer.texts_to_sequences([test_sentence])
padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
prediction_complex_lstm = model_complex_lstm.predict(padded_test_sequence)
predicted_class_complex_lstm = np.argmax(prediction_complex_lstm)
print("Sentiment prediction using complex LSTM model:", predicted_class_complex_lstm)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
Sentiment prediction using complex LSTM model: 1


In [None]:
import gradio as gr
import numpy as np
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import LabelEncoder

# Define the prediction function
def predict_sentiment(text):
    # Preprocess the input text
    test_sequence = tokenizer.texts_to_sequences([text])
    padded_test_sequence = pad_sequences(test_sequence, maxlen=maxlen)
    # Predict sentiment using the model
    prediction = model_complex_lstm.predict(padded_test_sequence)
    predicted_class = np.argmax(prediction)

    # Use LabelEncoder to map predicted class index to original sentiment label
    predicted_label = le.inverse_transform([predicted_class])[0]

    return predicted_label

# Create a Gradio interface
iface = gr.Interface(fn=predict_sentiment,
                     inputs="text",
                     outputs="text",
                     title="Sentiment Analysis",
                     description="Enter a text to predict its sentiment (negative, somewhat negative, somewhat positive, or positive).")

# Launch the interface
iface.launch()

Running on local URL:  http://127.0.0.1:7861

To create a public link, set `share=True` in `launch()`.




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
