In [1]:
import GetOldTweets3 as got

import string
import re
import pandas as pd

from collections import Counter

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_tweets(query, start, end):
    tweetCriteria = got.manager.TweetCriteria().setQuerySearch(query) \
        .setSince(start) \
        .setUntil(end) \
        .setMaxTweets(200)
    # Create list of tweets
    tweets = got.manager.TweetManager.getTweets(tweetCriteria)
    # Extract text from tweets
    text_tweets = [[tweet.text] for tweet in tweets]
    return text_tweets

In [3]:
def process_text(text):
    text = " ".join(str(text[word][0]) for word in range(0, len(text)))
    text_lower = text.lower()
    cleaned_text = text_lower.translate(str.maketrans("", "", string.punctuation))
    tokenized_words = word_tokenize(cleaned_text, "english")
    print("Total Number of Words ::", len(tokenized_words))

    final_words, strings, lemma_words = [], [], []
    for word in tokenized_words:
        if word not in stopwords.words("english"):
            final_words.append(word)
    print("After filtering stopwords ::", len(final_words))

    for word in final_words:
        word = "".join(re.split("[^a-zA-Z]", word))
        strings.append(word)
    print("Without characters/symbols ::", len(strings))

    for word in strings:
        word = WordNetLemmatizer().lemmatize(word)
        lemma_words.append(word)
    print("Lemmatized Words ::", len(lemma_words))

#     df = pd.DataFrame([final_words, strings, lemma_words], index=['final_words', 'strings', 'lemma_words']).T
#     df.to_csv('coronavirus_words.csv', index=False)

    lemma_final = " ".join(str(word) for word in lemma_words)
    print(">>>> Processing Text Complete")
    return lemma_final

In [4]:
def sentiment(string):
    sia = SentimentIntensityAnalyzer()
    sent = sia.polarity_scores(string)
    print("\n", sent)
    if (sent['neg']>sent['pos']) & (sent['neu']>sent['neg']):
        print(">> Mostly Neutral with Negative Sentiment")
    elif (sent['pos']>sent['neg']) & (sent['neu']>sent['pos']):
        print(">> Mostly Neutral with Positive Sentiment")
    elif (sent['neg']>sent['pos']) & (sent['neu']<sent['neg']):
        print(">> Negative Sentiment")
    else: print(">> Positive Sentiment")

In [5]:
def main(search, start_date, end_date):
    text_tweets = get_tweets(search, start_date, end_date)
    processed_text = process_text(text_tweets)
    sentiment(processed_text)

In [6]:
main('coronavirus', '2019-12-01', '2019-12-30')

Total Number of Words :: 3457
After filtering stopwords :: 2961
Without characters/symbols :: 2961
Lemmatized Words :: 2961
>>>> Processing Text Complete

 {'neg': 0.044, 'neu': 0.904, 'pos': 0.052, 'compound': 0.9257}
>> Mostly Neutral with Positive Sentiment


In [7]:
main('coronavirus', '2020-04-01', '2020-04-30')

Total Number of Words :: 4413
After filtering stopwords :: 3510
Without characters/symbols :: 3510
Lemmatized Words :: 3510
>>>> Processing Text Complete

 {'neg': 0.085, 'neu': 0.851, 'pos': 0.064, 'compound': -0.9978}
>> Mostly Neutral with Negative Sentiment
