In [2]:
import torch
import transformers

In [4]:
import pandas as pd

In [5]:

from textblob import TextBlob

In [6]:

from nltk.sentiment.vader import SentimentIntensityAnalyzer
import nltk


In [7]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)


TensorFlow version: 2.9.0


#### Twitter Api

In [8]:

# # Set up Twitter API credentials
# api_key = 'YOUR_API_KEY'
# api_secret_key = 'YOUR_SECRET_KEY'
# access_token = 'YOUR_ACCESS_TOKEN'
# access_token_secret = 'YOUR_ACCESS_SECRET'

# # Authenticate with Twitter
# auth = tweepy.OAuthHandler(api_key, api_secret_key)
# auth.set_access_token(access_token, access_token_secret)
# api = tweepy.API(auth)

# # Collect tweets
# tweets = []
# for tweet in tweepy.Cursor(api.search_tweets, q="your_topic", lang="en").items(100):
#     tweets.append(tweet.text)

In [9]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

nltk.download('stopwords')
nltk.download('wordnet')
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    # Remove URLs, mentions, and hashtags
    text = re.sub(r'http\S+|www\S+|@\S+|#\S+', '', text)
    # Lowercase
    text = text.lower()
    # Remove punctuation and numbers
    text = re.sub(r'\d+|[^\w\s]', '', text)
    # Tokenize and remove stopwords
    words = [word for word in text.split() if word not in stop_words]
    # Lemmatize
    words = [lemmatizer.lemmatize(word) for word in words]
    return ' '.join(words)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Home\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\Home\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [10]:
# # Apply preprocessing to all tweets
# tweets = [preprocess(tweet) for tweet in tweets]

## Sentiment Analysis

In [11]:
data = pd.read_csv('twitter_training_mod.csv')

In [12]:
data.shape

(74682, 4)

In [13]:
data.head()

Unnamed: 0,Id,entity,sentiment,content
0,2401,Borderlands,Positive,im getting on borderlands and i will murder yo...
1,2401,Borderlands,Positive,I am coming to the borders and I will kill you...
2,2401,Borderlands,Positive,im getting on borderlands and i will kill you ...
3,2401,Borderlands,Positive,im coming on borderlands and i will murder you...
4,2401,Borderlands,Positive,im getting on borderlands 2 and i will murder ...


In [14]:
data.dtypes

Id            int64
entity       object
sentiment    object
content      object
dtype: object

In [15]:
data['sentiment'] = data['sentiment'].str.lower()

In [13]:
import re

def preprocess_text(text):
  """Preprocesses text data (handles potential non-string data).

  Args:
      text: The text data to be preprocessed.

  Returns:
      The preprocessed text (empty string if not a string).
  """
  if not isinstance(text, str):
    return ""  # Handle non-string data (e.g., return empty string)
  # ... rest of your preprocessing steps for strings ...
  text = re.sub(r'http\S+|www\S+|@\S+|#\S+', '', text)
  text = re.sub(r'\W+', ' ', text)
  text = text.lower()
  return text

# Apply preprocessing to the content column
data['cleaned_content'] = data['content'].apply(preprocess_text)
# Delete rows with empty strings in 'cleaned_content'
data = data[data['cleaned_content'] != '']


In [14]:
data.shape

(73972, 5)

In [15]:
def textblob_sentiment(text):
    analysis = TextBlob(text)
    # Get polarity score
    polarity = analysis.sentiment.polarity
    # Classify polarity
    if polarity > 0:
        return 'positive'
    elif polarity < 0:
        return 'negative'
    else:
        return 'neutral'



In [16]:
# Apply TextBlob analysis
data['textblob_sentiment'] = data['cleaned_content'].apply(textblob_sentiment)

In [17]:
data['sentiment'].value_counts()

negative      22352
positive      20650
neutral       18102
irrelevant    12868
Name: sentiment, dtype: int64

In [18]:
data['textblob_sentiment'].value_counts()

positive    34209
negative    21137
neutral     18626
Name: textblob_sentiment, dtype: int64

In [19]:
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

def vader_sentiment(text):
    # Get sentiment scores
    scores = sid.polarity_scores(text)
    # Classify based on compound score
    if scores['compound'] >= 0.05:
        return 'positive'
    elif scores['compound'] <= -0.05:
        return 'negative'
    else:
        return 'neutral'


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Home\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [20]:
# Apply VADER analysis
data['vader_sentiment'] = data['cleaned_content'].apply(vader_sentiment)

In [21]:
data['vader_sentiment'].value_counts()

positive    34277
negative    27160
neutral     12535
Name: vader_sentiment, dtype: int64

In [22]:
# from transformers import pipeline

# # Load a pre-trained sentiment-analysis model
# sentiment_pipeline = pipeline("sentiment-analysis")

# def huggingface_sentiment(text):
#     # Get model predictions
#     result = sentiment_pipeline(text)[0]
#     return result['label'].lower()  # Convert to lowercase for consistency

In [23]:
# # Apply Hugging Face model analysis
# data['huggingface_sentiment'] = data['cleaned_content'].apply(huggingface_sentiment)

In [24]:
# Display a sample of the results
data[['content','sentiment', 'textblob_sentiment', 'vader_sentiment']].head(10)


Unnamed: 0,content,sentiment,textblob_sentiment,vader_sentiment
0,im getting on borderlands and i will murder yo...,positive,neutral,negative
1,I am coming to the borders and I will kill you...,positive,neutral,negative
2,im getting on borderlands and i will kill you ...,positive,neutral,negative
3,im coming on borderlands and i will murder you...,positive,neutral,negative
4,im getting on borderlands 2 and i will murder ...,positive,neutral,negative
5,im getting into borderlands and i can murder y...,positive,neutral,negative
6,So I spent a few hours making something for fu...,positive,positive,positive
7,So I spent a couple of hours doing something f...,positive,positive,positive
8,So I spent a few hours doing something for fun...,positive,positive,positive
9,So I spent a few hours making something for fu...,positive,positive,positive


In [25]:
from sklearn.metrics import accuracy_score, classification_report

# TextBlob evaluation
print("TextBlob Accuracy:", accuracy_score(data['sentiment'], data['textblob_sentiment']))
print(classification_report(data['sentiment'], data['textblob_sentiment']))

# VADER evaluation
print("VADER Accuracy:", accuracy_score(data['sentiment'], data['vader_sentiment']))
print(classification_report(data['sentiment'], data['vader_sentiment']))


TextBlob Accuracy: 0.3988536202887579


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

  irrelevant       0.00      0.00      0.00     12868
    negative       0.50      0.47      0.49     22352
     neutral       0.29      0.30      0.29     18102
    positive       0.39      0.65      0.49     20650

    accuracy                           0.40     73972
   macro avg       0.30      0.36      0.32     73972
weighted avg       0.33      0.40      0.36     73972

VADER Accuracy: 0.4095468555669713


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


              precision    recall  f1-score   support

  irrelevant       0.00      0.00      0.00     12868
    negative       0.47      0.57      0.51     22352
     neutral       0.29      0.20      0.24     18102
    positive       0.41      0.68      0.51     20650

    accuracy                           0.41     73972
   macro avg       0.29      0.36      0.31     73972
weighted avg       0.33      0.41      0.36     73972



  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
from transformers import pipeline

# Load a pre-trained sentiment-analysis model
sentiment_pipeline = pipeline("sentiment-analysis")

In [None]:
def huggingface_sentiment(text):
    # Get model predictions
    result = sentiment_pipeline(text)[0]
    return result['label'].lower()  # Convert to lowercase for consistency

# Apply Hugging Face model analysis
data['huggingface_sentiment'] = data['cleaned_content'].apply(huggingface_sentiment)
