In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding,LSTM,Dense,SimpleRNN
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping

In [2]:
data = pd.read_csv('/content/judge-1377884607_tweet_product_company.csv',encoding = 'latin-1')

In [3]:
data.shape

(9093, 3)

In [4]:

data.head()

Unnamed: 0,tweet_text,emotion_in_tweet_is_directed_at,is_there_an_emotion_directed_at_a_brand_or_product
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,iPhone,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,iPad or iPhone App,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,iPad,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,iPad or iPhone App,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Google,Positive emotion


In [5]:
data = data.drop('emotion_in_tweet_is_directed_at',axis = 1)

In [6]:
data = data.rename(columns={'is_there_an_emotion_directed_at_a_brand_or_product': 'sentiment'})

In [7]:
data.head()

Unnamed: 0,tweet_text,sentiment
0,.@wesley83 I have a 3G iPhone. After 3 hrs twe...,Negative emotion
1,@jessedee Know about @fludapp ? Awesome iPad/i...,Positive emotion
2,@swonderlin Can not wait for #iPad 2 also. The...,Positive emotion
3,@sxsw I hope this year's festival isn't as cra...,Negative emotion
4,@sxtxstate great stuff on Fri #SXSW: Marissa M...,Positive emotion


In [8]:
import re

In [9]:
def remove_tags(raw_text):
    cleaned_text = re.sub(r'[^a-zA-Z\s]', '', str(raw_text))
    return cleaned_text

data['tweet_text'] = data['tweet_text'].apply(remove_tags)

In [10]:
X = data['tweet_text'].values  # 'text' column contains the tweet text
y = data['sentiment'].values  # 'sentiment' column contains the sentiment labels

In [11]:
from sklearn.preprocessing import LabelEncoder

In [12]:
# Map sentiment labels to numerical values using label encoding
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [13]:
# Convert numerical labels to one-hot encoded format
y_onehot = to_categorical(y_encoded, num_classes=4)

In [14]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

In [15]:
# Tokenize text and convert to sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X_train)
X_train_sequences = tokenizer.texts_to_sequences(X_train)
X_test_sequences = tokenizer.texts_to_sequences(X_test)

In [16]:
# Pad sequences to have the same length
max_sequence_length = max(len(seq) for seq in X_train_sequences)
X_train_padded = pad_sequences(X_train_sequences, maxlen=max_sequence_length)
X_test_padded = pad_sequences(X_test_sequences, maxlen=max_sequence_length)

In [17]:
# Define the model architecture (LSTM)
model = Sequential()
model.add(Embedding(len(tokenizer.word_index) + 1, 100, input_length=max_sequence_length))
model.add(LSTM(128))
model.add(Dense(4, activation='softmax'))

In [18]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [19]:
early_stopping = EarlyStopping(patience=3, monitor='val_loss')

In [20]:
# Train the model
model.fit(X_train_padded, y_train, validation_data=(X_test_padded, y_test), epochs=10, batch_size=32)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7c868d572b00>

In [21]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_padded, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 1.5277096033096313
Test Accuracy: 0.6558548808097839


In [22]:
def predict_sentiment(tweet_text):
    sequence = tokenizer.texts_to_sequences([tweet_text])
    input_data = pad_sequences(sequence, maxlen=max_sequence_length)
    prediction = model.predict(input_data)[0]
    class_labels = ['positive', 'negative', 'neutral', 'no idea']
    predicted_class_index = np.argmax(prediction)
    predicted_class = class_labels[predicted_class_index]
    return predicted_class

In [23]:
input_text = "I have a 3G iPhone. After 3 hrs tweeting, it was dead! I need to upgrade."
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'I have a 3G iPhone. After 3 hrs tweeting, it was dead! I need to upgrade.' is: negative


In [24]:
input_text = "Awesome iPad/iPhone app that you'll likely appreciate for its design."
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'Awesome iPad/iPhone app that you'll likely appreciate for its design.' is: no idea


In [25]:
input_text = "I just noticed DST is coming this weekend. How many iPhone users will be an hour late at SXSW come Sunday morning?"
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'I just noticed DST is coming this weekend. How many iPhone users will be an hour late at SXSW come Sunday morning?' is: negative


In [26]:
input_text = "SXSW is just starting, CTIA is around the corner and googleio is only a hop skip and a jump from there, good time to be an android fan"
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'SXSW is just starting, CTIA is around the corner and googleio is only a hop skip and a jump from there, good time to be an android fan' is: no idea


In [27]:
input_text = "Apple has opened a pop-up store in Austin so the nerds in town for SXSW can get their new iPads."
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'Apple has opened a pop-up store in Austin so the nerds in town for SXSW can get their new iPads.' is: positive


In [28]:
input_text = "iPhone app makes it easy to connect on all social networks with people you meet."
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'iPhone app makes it easy to connect on all social networks with people you meet.' is: negative


In [29]:
# Example usage
input_text = "it was a fine day"
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'it was a fine day' is: positive


In [30]:
input_text = "This movie is amazing!"
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'This movie is amazing!' is: no idea


In [31]:
input_text = "it was a horrible day"
prediction = predict_sentiment(input_text)
print(f"The sentiment prediction for '{input_text}' is: {prediction}")

The sentiment prediction for 'it was a horrible day' is: negative
