In [2]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pickle
import pandas as pd
VOCAB_SIZE = 10000
MAX_LEN = 250
EMBEDDING_DIM = 16
MODEL_PATH = 'sentiment_analysis_model.h5'
data = pd.read_csv('/kaggle/input/sentiment140/training.1600000.processed.noemoticon.csv', encoding='latin-1')
df_shuffled = data.sample(frac=1).reset_index(drop=True)
texts = []
labels = []
for _, row in df_shuffled.iterrows():
    texts.append(row[-1])
    label = row[0]
    labels.append(0 if label == 0 else 1 if label == 2 else 2)
texts = np.array(texts)
labels = np.array(labels)
tokenizer = tf.keras.preprocessing.text.Tokenizer(num_words=VOCAB_SIZE)
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=MAX_LEN, value=VOCAB_SIZE-1, padding='post')
with open('tokenizer.pickle', 'wb') as handle:
    pickle.dump(tokenizer, handle, protocol=pickle.HIGHEST_PROTOCOL)
train_data = padded_sequences[:-1500]
test_data = padded_sequences[-1500:]
train_labels = labels[:-1500]
test_labels = labels[-1500:]
if os.path.exists(MODEL_PATH):
    print("Loading saved model...")
    model = load_model(MODEL_PATH)
else:
    print("Training a new model...")
    model = Sequential([
        Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=MAX_LEN),
        GlobalAveragePooling1D(),
        Dense(16, activation='relu'),
        Dense(3, activation='softmax')  
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    model.fit(train_data, train_labels, epochs=15, batch_size=32, validation_split=0.2)
    model.save(MODEL_PATH)
loss, accuracy = model.evaluate(test_data, test_labels)
print(f"Test accuracy: {accuracy * 100:.2f}%")
def encode_text(text):
    tokens = tf.keras.preprocessing.text.text_to_word_sequence(text)
    tokens = [tokenizer.word_index[word] if word in tokenizer.word_index else 0 for word in tokens]
    return pad_sequences([tokens], maxlen=MAX_LEN, padding='post', value=VOCAB_SIZE-1)
while True:
    user_input = input("Enter a sentence for sentiment analysis (or 'exit' to quit): ")
    if user_input.lower() == 'exit':
        break
    encoded_input = encode_text(user_input)
    prediction = np.argmax(model.predict(encoded_input))
    if prediction == 0:
        print("Sentiment: Negative")
    elif prediction == 1:
        print("Sentiment: Neutral")
    else:
        print("Sentiment: Positive")

  texts.append(row[-1])
  label = row[0]


Training a new model...




Epoch 1/15
[1m39963/39963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 4ms/step - accuracy: 0.6744 - loss: 0.5831 - val_accuracy: 0.7434 - val_loss: 0.5156
Epoch 2/15
[1m39963/39963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 4ms/step - accuracy: 0.7743 - loss: 0.4761 - val_accuracy: 0.7906 - val_loss: 0.4468
Epoch 3/15
[1m39963/39963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 4ms/step - accuracy: 0.7834 - loss: 0.4591 - val_accuracy: 0.7819 - val_loss: 0.4580
Epoch 4/15
[1m39963/39963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m146s[0m 4ms/step - accuracy: 0.7874 - loss: 0.4523 - val_accuracy: 0.7985 - val_loss: 0.4424
Epoch 5/15
[1m39963/39963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m147s[0m 4ms/step - accuracy: 0.7899 - loss: 0.4477 - val_accuracy: 0.7982 - val_loss: 0.4363
Epoch 6/15
[1m39963/39963[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m144s[0m 4ms/step - accuracy: 0.7929 - loss: 0.4415 - val_accuracy: 0.7876 - val_loss:

Enter a sentence for sentiment analysis (or 'exit' to quit):  test


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 86ms/step
Sentiment: Negative


Enter a sentence for sentiment analysis (or 'exit' to quit):  bye


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Sentiment: Negative


Enter a sentence for sentiment analysis (or 'exit' to quit):  hello


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
Sentiment: Neutral


Enter a sentence for sentiment analysis (or 'exit' to quit):  im good


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step
Sentiment: Neutral


Enter a sentence for sentiment analysis (or 'exit' to quit):  im happy


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Sentiment: Neutral


Enter a sentence for sentiment analysis (or 'exit' to quit):  im so happy today is a good day


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
Sentiment: Neutral


Enter a sentence for sentiment analysis (or 'exit' to quit):  exit
