In [1]:
import pandas as pd
from textblob import TextBlob
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVR
from sklearn.metrics import classification_report, accuracy_score

nltk.download("stopwords")
stop_words = set(stopwords.words("english"))

def preprocess_tweet(tweet):
    tweet = re.sub(r"(https?:\/\/\S+|[^A-Za-z0-9\s]+)", "", tweet.lower())
    words = tweet.split()
    filtered_words = [word for word in words if word not in stop_words]
    return " ".join(filtered_words)

def polarity_to_class(polarity):
    if -1 <= polarity < -0.34:
        return "negative"
    elif -0.34 <= polarity <= 0.34:
        return "neutral"
    else:
        return "positive"

# Load the top 100 words data
biden = pd.read_csv("Biden_Word_Polarity.csv")
putin = pd.read_csv("Putin_Word_Polarity.csv")
zelenskyy = pd.read_csv("Zelenskyy_Word_Polarity.csv")
xijinping = pd.read_csv("Xijinping_Word_Polarity.csv")

# Drop rows with missing values
biden.dropna(inplace=True)
putin.dropna(inplace=True)
zelenskyy.dropna(inplace=True)
xijinping.dropna(inplace=True)

# Combine the top 100 words data for all politicians and remove duplicates
all_top_words = pd.concat([biden, putin, zelenskyy, xijinping], ignore_index=True).drop_duplicates(subset=["Word"])

# Create the TfidfVectorizer
vectorizer = TfidfVectorizer(vocabulary=all_top_words["Word"].tolist(), ngram_range=(1, 1))

# Prepare the top 100 words data for training
X_train = vectorizer.fit_transform(all_top_words["Word"]).toarray()
y_train = all_top_words["Polarity"]

# Train an SVM regression model on the top 100 words data
model = SVR()
model.fit(X_train, y_train)

# Load the tweet data for Modi and Trudeau
modi_tweets = pd.read_csv("Filtered_Modi_CSV.csv")["Text"]
trudeau_tweets = pd.read_csv("Filtered_Trudeau_CSV.csv")["Text"]

# Drop rows with missing values
modi_tweets.dropna(inplace=True)
trudeau_tweets.dropna(inplace=True)

# Preprocess all tweets
modi_tweets = modi_tweets.apply(preprocess_tweet)
trudeau_tweets = trudeau_tweets.apply(preprocess_tweet)

# Create a binary feature matrix for the Modi and Trudeau tweets
X_test = vectorizer.transform(pd.concat([modi_tweets, trudeau_tweets], ignore_index=True)).toarray()

# Predict sentiment polarity of the Modi and Trudeau tweets
y_pred = model.predict(X_test)

# Convert the predicted polarity values to sentiment classes
y_pred_classes = [polarity_to_class(polarity) for polarity in y_pred]

# Print the sentiment polarity predictions for each tweet
for i, tweet in enumerate(pd.concat([modi_tweets, trudeau_tweets], ignore_index=True)):
    print(f"Tweet: {tweet}")
    print(f"Predicted Polarity: {y_pred[i]}")
    print(f"Predicted Sentiment Class: {y_pred_classes[i]}\n")


[nltk_data] Downloading package stopwords to C:\Users\Diljit
[nltk_data]     Singh\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Tweet: vivekgramaswamy mini modi one trick pony actual policy positions lifted team trump
Predicted Polarity: 0.021356337416514895
Predicted Sentiment Class: neutral

Tweet: 11 pm modis roadshow span 11 1 pm 101 km 4 pm 10 pm 265 km
Predicted Polarity: 0.021356337416514895
Predicted Sentiment Class: neutral

Tweet: 9 meanwhile prime minister narendra modi conduct 366 km roadshow bengaluru may 6
Predicted Polarity: 0.021356337416514895
Predicted Sentiment Class: neutral

Tweet: bjp4tamilnadu go back modi modi gedi
Predicted Polarity: 0.021356337416514895
Predicted Sentiment Class: neutral

Tweet: upadhyayabhii ab yeh patrkarita zyada din nhi chalne wali abki baar 2024 ke baad tumhari baari sootr batate hen modi sarkar media trail hindu muslim karne per jail men dalne ki tayyari
Predicted Polarity: 3.937309353297877e-05
Predicted Sentiment Class: neutral

Tweet: manipur burning help mary kom sought help pm modi late night
Predicted Polarity: 0.021356337416514895
Predicted Sentiment Class

In [2]:
# Calculate the true sentiment classes for the Modi and Trudeau tweets
modi_true_classes = [polarity_to_class(TextBlob(tweet).sentiment.polarity) for tweet in modi_tweets]
trudeau_true_classes = [polarity_to_class(TextBlob(tweet).sentiment.polarity) for tweet in trudeau_tweets]

# Create a binary feature matrix for the Modi and Trudeau tweets
X_test = vectorizer.transform(pd.concat([modi_tweets, trudeau_tweets], ignore_index=True)).toarray()

# Predict sentiment polarity of the Modi and Trudeau tweets
y_pred = model.predict(X_test)

# Convert the predicted polarity values to sentiment classes
y_pred_classes = [polarity_to_class(polarity) for polarity in y_pred]

# Calculate the accuracy and classification report
accuracy = accuracy_score(modi_true_classes + trudeau_true_classes, y_pred_classes)
print("Accuracy:", accuracy)
report = classification_report(modi_true_classes + trudeau_true_classes, y_pred_classes)
print("Classification Report:\n", report)

Accuracy: 0.8942992874109263
Classification Report:
               precision    recall  f1-score   support

    negative       0.65      0.60      0.62        47
     neutral       0.92      0.95      0.94       707
    positive       0.73      0.59      0.65        88

    accuracy                           0.89       842
   macro avg       0.77      0.71      0.74       842
weighted avg       0.89      0.89      0.89       842



In [3]:
#For comparison to actual polarity

# Calculate the number of tweets for each leader
num_modi_tweets = len(modi_tweets)
num_trudeau_tweets = len(trudeau_tweets)

# Slice the y_pred array into two separate arrays for Modi and Trudeau
modi_pred_polarities = y_pred[:num_modi_tweets]
trudeau_pred_polarities = y_pred[num_modi_tweets:]

# Calculate the average polarity score for Modi and Trudeau
modi_avg_polarity = modi_pred_polarities.mean()
trudeau_avg_polarity = trudeau_pred_polarities.mean()

# Print the average polarity scores
print(f"Modi's Average Polarity Score: {modi_avg_polarity}")
print(f"Trudeau's Average Polarity Score: {trudeau_avg_polarity}")

Modi's Average Polarity Score: 0.06323040590950363
Trudeau's Average Polarity Score: 0.02153289336897309
