In [None]:
# Running these commands once may be needed
# ! pip install textblob
# import nltk
# nltk.download('movie_reviews')
# nltk.download('punkt')

In [2]:
# Imports

import pandas as pd
from textblob import TextBlob

In [72]:
# Load sample tweet data

file_name = './Resources/sample_tweets.txt'
with open(file_name, encoding = "ISO-8859-1") as f:
    raw_tweets = f.readlines()
    
# Limit number of entries for testing purposes
n_testing = 10
raw_tweets = raw_tweets[:n_testing]
    
# Remove `\n` at the end of each line
raw_tweets = [x.strip() for x in raw_tweets] 

print(f'Loaded {len(raw_tweets)} tweets into raw_tweets[]')



Loaded 10 tweets into raw_tweets[]


In [74]:
# Clean data

tweets = []
for i in range(len(raw_tweets)):
    raw_tweet = raw_tweets[i]
    cleaned_tweet = ''
    
    # Remove leading tags for political vs non political (not general to twitter, a feature of the sample data)
    if (raw_tweet[0:5] == 'POLIT'):
        cleaned_tweet = raw_tweet[6:]
    elif (raw_tweet[0:3] == 'NOT'):
        cleaned_tweet= raw_tweet[4:]
    else:
        cleaned_tweet = raw_tweet

    # Remove URLs
    raw_tweet = cleaned_tweet
    cleaned_tweet = ' '.join(word for word in raw_tweet.split(' ') if not word.startswith('http'))
    
    # Remove 'RT'
    raw_tweet = cleaned_tweet
    cleaned_tweet = ' '.join(word for word in raw_tweet.split(' ') if not word.startswith('RT'))
    
    # Remove Twitter handles (any word starting with '@')
    raw_tweet = cleaned_tweet
    cleaned_tweet = ' '.join(word for word in raw_tweet.split(' ') if not word.startswith('@'))
    
    # Write cleaned_tweet to tweets[]
    tweets.append(cleaned_tweet)
        
        
# Display tweets[]
for tweet in tweets:
    print(tweet)





Global Voices Online ÃÂ» Alex Castro: A liberal, libertarian and libertine Brazilian blogger
Do the Conservatives Have a Death Wish?
I've seen all of your movies and Capitalism is my favorite. Keep up the great work!
* House Dems ask for civility at town halls and invoke George Washington in quest for socialism
Quote of the week: My political opinions lean more and more towards Anarchy &lt;Tolkien the anarchist?!
LOL, I like quotes. Feminist, anti-men quotes.
I also think that most liberals don't spend a lot of time thinking about tolerating. Tolerance connotes condescension.
check explains brownshirts. Conservative action plan for august recess All mtgs disrupted
Finally US asks for extradition of Polanski - what were they waiting for? #feminism
FUNNY! Teabagger quoting (but not understanding) Liberal Justice. William O Douglas. WHAT A HOOT ...


In [77]:
# Assess sentiment 
from textblob.sentiments import NaiveBayesAnalyzer

for tweet in tweets:
    # Calculate tweet sentiment
    blob = TextBlob(tweet, analyzer=NaiveBayesAnalyzer())

    # Build numeric sentiment score in range [-1, 1]
    if blob.sentiment.classification == 'pos':
        sentiment_score = blob.sentiment.p_pos
    elif blob.sentiment.classification == 'neg':
        sentiment_score = -1 * blob.sentiment.p_neg
    else: 
        sentiment_score = float('NaN')
    
    # Print tweet and score
    print(blob)
    print(f'sentiment: {blob.sentiment.classification} ({round(sentiment_score, 3)})')


Global Voices Online ÃÂ» Alex Castro: A liberal, libertarian and libertine Brazilian blogger
sentiment: pos (0.997)


Do the Conservatives Have a Death Wish?
sentiment: pos (0.846)


I've seen all of your movies and Capitalism is my favorite. Keep up the great work!
sentiment: pos (0.673)


* House Dems ask for civility at town halls and invoke George Washington in quest for socialism
sentiment: pos (0.946)


Quote of the week: My political opinions lean more and more towards Anarchy &lt;Tolkien the anarchist?!
sentiment: pos (0.997)


LOL, I like quotes. Feminist, anti-men quotes.
sentiment: neg (-0.898)


I also think that most liberals don't spend a lot of time thinking about tolerating. Tolerance connotes condescension.
sentiment: pos (0.994)


check explains brownshirts. Conservative action plan for august recess All mtgs disrupted
sentiment: pos (0.886)


Finally US asks for extradition of Polanski - what were they waiting for? #feminism
sentiment: pos (0.684)


FUNNY! Teabagger

In [104]:

test_cases =[
    'I love Bernie so much and he is the best',
    'I hate Bernie so much and he is the worst',
    'I do not feel strongly one way or the other about Bernie',
    'Bernie is a socialist',
    'Bernie is a dirty socialist and he hates freedom',
    'Bernie is a great socialist who will restore power to the people'
]



for tweet in test_cases:
    
    # Calculate tweet sentiment and score
    blob = TextBlob(tweet, analyzer=NaiveBayesAnalyzer())
    sentiment_score = 2.0 * blob.sentiment.p_pos - 1.0

    # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(blob)
    print(f'Method 1: {sentiment} {round(sentiment_score, 2)}')
    
    
    # Calculate tweet sentiment and score from PatternAnalyzer
    blob = TextBlob(tweet)
    sentiment_score = blob.sentiment.polarity
    
        # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(f'Method 2: {sentiment} {round(sentiment_score, 2)}')
    print('\n')




I love Bernie so much and he is the best
Method 1: Neutral 0.24
Method 2: Positive 0.57


I hate Bernie so much and he is the worst
Method 1: Negative -0.59
Method 2: Negative -0.53


I do not feel strongly one way or the other about Bernie
Method 1: Positive 0.66
Method 2: Neutral 0.15


Bernie is a socialist
Method 1: Neutral 0.0
Method 2: Neutral 0.0


Bernie is a dirty socialist and he hates freedom
Method 1: Positive 0.45
Method 2: Negative -0.6


Bernie is a great socialist who will restore power to the people
Method 1: Positive 0.68
Method 2: Positive 0.8




In [8]:
# Does order of words matter?

test_cases =[
    'I love Bernie so much and he is the best',
    'he is the best and I love Bernie so much',
    'love the much so he I  and is best Bernie',]

for tweet in test_cases:

    # Calculate tweet sentiment and score from PatternAnalyzer
    blob = TextBlob(tweet)
    sentiment_score = blob.sentiment.polarity
    
    # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(blob)
    print(f'{sentiment} {round(sentiment_score, 2)}')
    print('')


I love Bernie so much and he is the best
Positive 0.57

he is the best and I love Bernie so much
Positive 0.57

love the much so he I  and is best Bernie
Positive 0.57



In [9]:
# Do capitals matter?

test_cases =[
    'i love bernie so much and he is the best',
    'I LOVE BERNIE SO MUCH AND HE IS THE BEST',
    'I lOvE bErNiE sO mUcH aNd He Is ThE bEsT',]

for tweet in test_cases:

    # Calculate tweet sentiment and score from PatternAnalyzer
    blob = TextBlob(tweet)
    sentiment_score = blob.sentiment.polarity
    
    # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(blob)
    print(f'{sentiment} {round(sentiment_score, 2)}')
    print('')

i love bernie so much and he is the best
Positive 0.57

I LOVE BERNIE SO MUCH AND HE IS THE BEST
Positive 0.57

I lOvE bErNiE sO mUcH aNd He Is ThE bEsT
Positive 0.57



In [10]:
# Do stop words matter?

test_cases =[
    'I love Bernie so much and he is the best',
    'I love Bernie so much he best',
    'love Bernie much best',
]

for tweet in test_cases:

    # Calculate tweet sentiment and score from PatternAnalyzer
    blob = TextBlob(tweet)
    sentiment_score = blob.sentiment.polarity
    
    # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(blob)
    print(f'{sentiment} {round(sentiment_score, 2)}')
    print('')

I love Bernie so much and he is the best
Positive 0.57

I love Bernie so much he best
Positive 0.75

love Bernie much best
Positive 0.75



In [12]:
# Does punctuation matter?

test_cases =[
    'I love Bernie so much and he is the best',
    'I love Bernie so much, and he is the best.',
    'I love Bernie so much and he is the best!',
    'I love Bernie so much!!!! and he is the best!!!',
]

for tweet in test_cases:

    # Calculate tweet sentiment and score from PatternAnalyzer
    blob = TextBlob(tweet)
    sentiment_score = blob.sentiment.polarity
    
    # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(blob)
    print(f'{sentiment} {round(sentiment_score, 2)}')
    print('')

I love Bernie so much and he is the best
Positive 0.57

I love Bernie so much, and he is the best.
Positive 0.57

I love Bernie so much and he is the best!
Positive 0.57

I love Bernie so much!!!! and he is the best!!!
Positive 0.66



In [13]:
# Does repetition matter?

test_cases =[
    'I love Bernie so much and he is the best',
    'I love Bernie so much and he is the best I love Bernie so much and he is the best',
    'I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best',
    'I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best'
]

for tweet in test_cases:

    # Calculate tweet sentiment and score from PatternAnalyzer
    blob = TextBlob(tweet)
    sentiment_score = blob.sentiment.polarity
    
    # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(blob)
    print(f'{sentiment} {round(sentiment_score, 2)}')
    print('')

I love Bernie so much and he is the best
Positive 0.57

I love Bernie so much and he is the best I love Bernie so much and he is the best
Positive 0.57

I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best
Positive 0.57

I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best I love Bernie so much and he is the best
Positive 0.57



In [14]:
# Do emojis matter?

test_cases =[
    'I love Bernie so much and he is the best',
    'I love Bernie so much and he is the best =)',
    'I hate Bernie so much and he is the worst',
    'I hate Bernie so much and he is the worst :(',
]

for tweet in test_cases:

    # Calculate tweet sentiment and score from PatternAnalyzer
    blob = TextBlob(tweet)
    sentiment_score = blob.sentiment.polarity
    
    # Build numeric sentiment score in range [-1, 1]    
    if sentiment_score >= 0.25:
        sentiment = 'Positive'
    elif sentiment_score <= -0.25:
        sentiment = 'Negative'
    else:
        sentiment = 'Neutral'
        
    # Print tweet and score
    print(blob)
    print(f'{sentiment} {round(sentiment_score, 2)}')
    print('')

I love Bernie so much and he is the best
Positive 0.57

I love Bernie so much and he is the best =)
Positive 0.55

I hate Bernie so much and he is the worst
Negative -0.53

I hate Bernie so much and he is the worst :(
Negative -0.59

