# 1. Data Preparation
Assuming we have the data in a CSV file with columns text (tweet text) and sentiment (sentiment labels).

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load the dataset
df = pd.read_csv('/content/judge-1377884607_tweet_product_company.csv', encoding='unicode_escape')
# Rename columns for easier reference
df.rename(columns={'tweet_text': 'text', 'is_there_an_emotion_directed_at_a_brand_or_product': 'sentiment'}, inplace=True)

# Ensure all entries in the text column are strings
df['text'] = df['text'].astype(str).fillna('')

# Preprocess the sentiment labels
label_encoder = LabelEncoder()
df['sentiment'] = label_encoder.fit_transform(df['sentiment'])

# Tokenize the tweet texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])

# Pad the sequences
max_length = max(len(seq) for seq in sequences)
X = pad_sequences(sequences, maxlen=max_length, padding='post')
y = df['sentiment']

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# 2: Model Creation
We will create an LSTM model. You can switch to a SimpleRNN by replacing the LSTM layer.

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, SimpleRNN

# Define the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=64, input_length=max_length))
model.add(LSTM(64, return_sequences=False))
model.add(Dense(4, activation='softmax'))  # 4 classes: positive, negative, neutral, no_idea

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 33, 64)            649536    
                                                                 
 lstm (LSTM)                 (None, 64)                33024     
                                                                 
 dense (Dense)               (None, 4)                 260       
                                                                 
Total params: 682820 (2.60 MB)
Trainable params: 682820 (2.60 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


# 3: Model Training

In [12]:
# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

# 4: Model Evaluation

In [13]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy}')


Test Accuracy: 0.6146234273910522


# 5: Prediction

In [17]:
# Predict sentiments for new tweets
new_tweets = ["I love my new Apple iPhone!", "Google's new update is great."]
new_sequences = tokenizer.texts_to_sequences(new_tweets)
new_padded_sequences = pad_sequences(new_sequences, maxlen=max_length, padding='post')

predictions = model.predict(new_padded_sequences)
predicted_classes = label_encoder.inverse_transform(predictions.argmax(axis=1))

for tweet, sentiment in zip(new_tweets, predicted_classes):
    print(f'Tweet: {tweet}\nPredicted Sentiment: {sentiment}\n')


Tweet: I love my new Apple iPhone!
Predicted Sentiment: Positive emotion

Tweet: Google's new update is great.
Predicted Sentiment: Negative emotion

