In [1]:
import pandas as pd
import numpy as np

In [2]:
pip install gensim




In [3]:
from gensim.models import Word2Vec

In [4]:
df=pd.read_csv('tweet_sentiment.csv')

In [5]:
df.head()

Unnamed: 0,tweet,sentiment
0,The event starts at 5 PM.,neutral
1,I hate how this turned out.,negative
2,Fantastic experience!,positive
3,Fantastic experience!,positive
4,This is the worst thing ever!,negative


In [6]:
df.tail()

Unnamed: 0,tweet,sentiment
995,It’s cloudy outside.,neutral
996,Great job by the team!,positive
997,I'm so happy about the news!,positive
998,It ruined my whole day.,negative
999,I'm sick of this happening.,negative


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   tweet      1000 non-null   object
 1   sentiment  1000 non-null   object
dtypes: object(2)
memory usage: 15.8+ KB


In [8]:
df.describe()

Unnamed: 0,tweet,sentiment
count,1000,1000
unique,17,3
top,Fantastic experience!,positive
freq,71,396


In [9]:
import spacy

In [10]:
nlp=spacy.load('en_core_web_sm')

In [11]:
doc=df['tweet'].apply(lambda x: nlp(x))

In [12]:
def preprocess(texts):
  tokens=[token.text for token in texts if not token.is_stop and not token.is_punct]
  return tokens

In [13]:
final=doc.apply(lambda x: preprocess(x))

In [14]:


final_text=final.apply(lambda x: ' '.join(x))

In [15]:
model = Word2Vec(final_text, vector_size=100, window=5, min_count=1, workers=4)




In [16]:
max_len = 100
X = []

for sentence in final_text:
    sentence_vecs = []
    for word in sentence:
        if word in model.wv:
            sentence_vecs.append(model.wv[word])
    # Pad to fixed length
    while len(sentence_vecs) < max_len:
        sentence_vecs.append(np.zeros(model.vector_size))
    sentence_vecs = sentence_vecs[:max_len]  # truncate if longer
    X.append(sentence_vecs)

X = np.array(X)  # Shape: (num_sentences, 100, 100)


In [17]:
from sklearn.preprocessing import LabelEncoder

In [18]:
le=LabelEncoder()

In [19]:
y=le.fit_transform(df['sentiment'])

In [30]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [47]:
vocab_size = 1000
max_length = 10
embedding_dim = 64

# Tokenization
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(final_text)
sequences = tokenizer.texts_to_sequences(final_text)
padded = pad_sequences(sequences, maxlen=max_length, padding='post')
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length),
    LSTM(64),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Use 'softmax' for multi-class
])



In [48]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [49]:
from sklearn.model_selection import train_test_split

In [50]:
x_train,x_test,y_train,y_test=train_test_split(sequences,y,test_size=0.2,random_state=42)

In [51]:
x_train = pad_sequences(x_train, padding='post', maxlen=10)

In [52]:
model.fit(x_train,y_train,epochs=25)

Epoch 1/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.3199 - loss: 0.3422
Epoch 2/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.3325 - loss: -0.9111
Epoch 3/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.4737 - loss: -3.2478
Epoch 4/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6168 - loss: -5.1585
Epoch 5/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6311 - loss: -5.8227
Epoch 6/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5958 - loss: -7.2038
Epoch 7/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6253 - loss: -7.4820
Epoch 8/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6146 - loss: -8.3143
Epoch 9/25
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x78d3d9ac9b10>

In [53]:
x_test=pad_sequences(x_test, padding='post', maxlen=10)

In [54]:
y_pred = model.predict(x_test)
print("Predicted Output:", y_pred)




[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
Predicted Output: [[1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [8.8459172e-05]
 [1.0000000e+00]
 [1.0000000e+00]
 [8.3381135e-05]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [8.8459172e-05]
 [9.6328586e-05]
 [1.0000000e+00]
 [8.3381135e-05]
 [8.3381135e-05]
 [9.2849201e-05]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [9.2849201e-05]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [9.2849201e-05]
 [9.6328586e-05]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [1.0000000e+00]
 [8.3381135e-05]
 [1.00000