In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.utils import to_categorical
import numpy as np

In [4]:
def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', str(text).lower().strip())
    return text

In [5]:
df = pd.read_csv('Twitter_data.csv')
df['clean_text'] = df['clean_text'].apply(preprocess_text)
valid_categories = [-1, 0, 1]
df = df[df['category'].isin(valid_categories)]
category_mapping = {-1: 0, 0: 1, 1: 2}
df['category'] = df['category'].map(category_mapping)

In [6]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['clean_text'])
sequences = tokenizer.texts_to_sequences(df['clean_text'])
max_len = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')
labels = to_categorical(df['category'], num_classes=3)

In [7]:
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 100
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len))
model.add(LSTM(units=128))
model.add(Dense(units=3, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
model.fit(padded_sequences, labels, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m4075/4075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 74ms/step - accuracy: 0.7677 - loss: 0.5554 - val_accuracy: 0.9525 - val_loss: 0.1594
Epoch 2/10
[1m4075/4075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m297s[0m 73ms/step - accuracy: 0.9654 - loss: 0.1124 - val_accuracy: 0.9694 - val_loss: 0.1020
Epoch 3/10
[1m4075/4075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m300s[0m 74ms/step - accuracy: 0.9816 - loss: 0.0607 - val_accuracy: 0.9640 - val_loss: 0.1181
Epoch 4/10
[1m4075/4075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m300s[0m 74ms/step - accuracy: 0.9886 - loss: 0.0382 - val_accuracy: 0.9646 - val_loss: 0.1287
Epoch 5/10
[1m4075/4075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 74ms/step - accuracy: 0.9928 - loss: 0.0234 - val_accuracy: 0.9633 - val_loss: 0.1452
Epoch 6/10
[1m4075/4075[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 74ms/step - accuracy: 0.9956 - loss: 0.0142 - val_accuracy: 0.9605 - val_loss: 0.146

<keras.src.callbacks.history.History at 0x1ccf42c0460>

In [9]:
model.save_weights('sentiment_model1.weights.h5')
print("Model weights saved successfully.")

Model weights saved successfully.


In [1]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import re
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import numpy as np

df = pd.read_csv('Twitter_data.csv')

def preprocess_text(text):
    text = re.sub(r'[^\w\s]', '', str(text).lower().strip())
    return text

df['clean_text'] = df['clean_text'].apply(preprocess_text)

valid_categories = [-1, 0, 1]
df = df[df['category'].isin(valid_categories)]
category_mapping = {-1: 0, 0: 1, 1: 2}
df['category'] = df['category'].map(category_mapping)

tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['clean_text'])
sequences = tokenizer.texts_to_sequences(df['clean_text'])
max_len = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_len, padding='post')

vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 100

model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len))
model.add(LSTM(units=128))
model.add(Dense(units=3, activation='softmax'))

model.build((None, max_len))

model.load_weights('sentiment_model1.weights.h5')
print("Model weights loaded successfully.")

def predict_sentiment(input_text):
    input_text = preprocess_text(input_text)
    input_sequence = tokenizer.texts_to_sequences([input_text])
    padded_sequence = pad_sequences(input_sequence, maxlen=max_len, padding='post')
    
    prediction = model.predict(padded_sequence)
    predicted_class = np.argmax(prediction, axis=1)[0]
    
    sentiment_labels = {-1: 'Neutral', 0: 'Negative', 1: 'Positive'}
    predicted_sentiment = sentiment_labels[predicted_class]
    
    return predicted_sentiment

while True:
    input_text = input("Enter text (Press Enter with blank textbox to exit): ")
    
    if input_text == '':
        print("Exited")
        break
    
    predicted_sentiment = predict_sentiment(input_text)
    print(f"Predicted sentiment: {predicted_sentiment}")

Model weights loaded successfully.
Enter text (Press Enter with blank textbox to exit): I like pizza
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 248ms/step
Predicted sentiment: Positive
Enter text (Press Enter with blank textbox to exit): I hate pizza
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step
Predicted sentiment: Negative
Enter text (Press Enter with blank textbox to exit): Hello
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step
Predicted sentiment: Positive
Enter text (Press Enter with blank textbox to exit): 
Exited
