In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [13]:
# Load dataset
dataset = pd.read_csv("/content/Sentiment_Analysis_Dataset.csv", encoding='latin-1', usecols=['ItemID', 'Sentiment', 'SentimentText'])

# Display the first few rows to verify
dataset.head()


Unnamed: 0,ItemID,Sentiment,SentimentText
0,1,0,is so sad for my APL frie...
1,2,0,I missed the New Moon trail...
2,3,1,omg its already 7:30 :O
3,4,0,.. Omgaga. Im sooo im gunna CRy. I'...
4,5,0,i think mi bf is cheating on me!!! ...


In [10]:
# Preprocess the data
# Separate features and target variable
X = dataset['SentimentText']
y = dataset['Sentiment']

# Convert text data to numeric using CountVectorizer
vectorizer = CountVectorizer(stop_words='english', max_features=5000)
X_vectorized = vectorizer.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)


In [12]:
# Cell 4: Logistic Regression
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
logistic_preds = logistic_model.predict(X_test)
logistic_accuracy = accuracy_score(y_test, logistic_preds)
print(f"Logistic Regression Accuracy: {logistic_accuracy:.2f}")


Logistic Regression Accuracy: 0.74


In [5]:
# Cell 5: Support Vector Machine
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)
svm_preds = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_preds)
print(f"Support Vector Machine Accuracy: {svm_accuracy:.2f}")


Support Vector Machine Accuracy: 0.74


In [6]:
# Cell 6: Random Forest
# Import the RandomForestClassifier class from sklearn.ensemble
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_preds)
print(f"Random Forest Accuracy: {rf_accuracy:.2f}")


Random Forest Accuracy: 0.72


In [7]:
# Neural Network
X_train_dense = X_train.toarray()
X_test_dense = X_test.toarray()

nn_model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_dense.shape[1],)),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train_dense, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Evaluate the neural network
nn_loss, nn_accuracy = nn_model.evaluate(X_test_dense, y_test)
print(f"Neural Network Accuracy: {nn_accuracy:.2f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 13ms/step - accuracy: 0.6395 - loss: 0.6210 - val_accuracy: 0.7364 - val_loss: 0.5243
Epoch 2/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 11ms/step - accuracy: 0.7831 - loss: 0.4673 - val_accuracy: 0.7370 - val_loss: 0.5291
Epoch 3/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 10ms/step - accuracy: 0.8224 - loss: 0.4034 - val_accuracy: 0.7362 - val_loss: 0.5505
Epoch 4/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11ms/step - accuracy: 0.8518 - loss: 0.3482 - val_accuracy: 0.7294 - val_loss: 0.5942
Epoch 5/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 14ms/step - accuracy: 0.8783 - loss: 0.2879 - val_accuracy: 0.7279 - val_loss: 0.6662
Epoch 6/10
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 12ms/step - accuracy: 0.8957 - loss: 0.2451 - val_accuracy: 0.7306 - val_loss: 0.7653
Epoc

In [17]:
# Function to predict the sentiment of a single tweet
def predict_sentiment(tweet):
    # Preprocess the input tweet
    tweet_vectorized = vectorizer.transform([tweet])

    # Logistic Regression Prediction
    logistic_pred = logistic_model.predict(tweet_vectorized)[0]
    return "Happy" if logistic_pred == 1 else "Sad"

# Take input from the user
tweet = input("Enter a tweet to analyze its sentiment: ")
emotion = predict_sentiment(tweet)
print(f"The sentiment of the tweet is: {emotion}")


Enter a tweet to analyze its sentiment: I love Python
The sentiment of the tweet is: Happy
