In [21]:
# Extracting tweets from search api

from tweepy import OAuthHandler
from twitter_credentials import Tokens
import tweepy as twp

auth = OAuthHandler(Tokens.CONSUMER_KEY, Tokens.CONSUMER_SECRET)
auth.set_access_token(Tokens.ACCESS_TOKEN, Tokens.ACCESS_TOKEN_SECRET)

api = twp.API(auth)

tweets_info = twp.Cursor(api.search,
                             q=('Canada OR Canada+import OR Canada+export OR Canada+vehicle+sales OR Canada+Education'),
                             lang="en", 
                             tweet_mode="extended").items(2000)


In [22]:
#clean the tweets by removing URLs, emoticons and, special characters except '-' and '+' because, in few cases 
#they decide the polarity of the word. Example: absent-minded, anti-nationalist  
#Also, extracted the extended text of each tweet

import re

list_tweets = []

for item in tweets_info:    
    # to retrieve the full text of each tweet
    if 'retweeted_status' in item._json:
        tweet = item._json['retweeted_status']['full_text']
    else:
        tweet = item.full_text
        
    tweet_no_url = re.sub(r"http\S+", "", tweet) #remove URLs
    tweet_no_emoji= re.compile('[\U00010000-\U0010ffff]', flags=re.UNICODE).sub(r'', tweet_no_url) #remove emoticons        
    cleaned_tweet = re.sub('[^A-Za-z0-9\+\- ]+', '', tweet_no_emoji) #remove special characters
   
    list_tweets.append(cleaned_tweet) #adding cleaned tweets to a list
    

In [23]:
# creating bag of words for each tweet and store in dictionary that contains tweets and the corresponding list of bag of words

dic={}
counter=1
for tweet in list_tweets:    
    list = []    
    list = tweet.split()    
    bag_of_words = {}
    for word in list:        
        if word in bag_of_words:
             bag_of_words[word] = bag_of_words[word] + 1
        else: 
             bag_of_words[word] = 1
               
    dic[tweet] = bag_of_words       
    counter += 1                   

In [24]:
# using the Lexicon opinion set of positive and negative words to perform sentiment analysis

fileNegative = open('negative-words.txt', 'r')
filePositive = open('positive-words.txt', 'r')

content_fileNegative = fileNegative.read()
content_filePositive = filePositive.read()

negative_file_list = content_fileNegative.split()
positive_file_list = content_filePositive.split()

In [25]:
# looping through the dictionary created above to find the polarity of each tweet in the dictionary
# created dataframe to show the results 

import pandas as pd
df = pd.DataFrame(columns=['Tweet', 'Message', 'Match', 'Polarity'])

count_tweets = 0

tweet_list = dic.keys()    
for tweet in tweet_list:
    count_negative = 0
    count_positive = 0
    count_neutral = 0  
    negative_list = []
    positive_list = []
    
    bag_of_words_dic = dic[tweet]
    bag_of_words_dic_keys = bag_of_words_dic.keys()    
    for word in bag_of_words_dic_keys:
        increment = bag_of_words_dic[word]
        if word in negative_file_list:          
            count_negative += increment
            negative_list.append(word)
        elif word in positive_file_list:
            count_positive += increment
            positive_list.append(word)
        
    if count_negative > count_positive:        
        df.loc[count_tweets] = [count_tweets+1, tweet, negative_list, 'NEGATIVE' ]
    elif count_negative < count_positive:
        df.loc[count_tweets] = [count_tweets+1, tweet, positive_list, 'POSITIVE']
    else:
        df.loc[count_tweets] = [count_tweets+1, tweet, '-', 'NEUTRAL']
        
    count_tweets += 1
        

In [26]:
#printing the resultant data frame
df

Unnamed: 0,Tweet,Message,Match,Polarity
0,1,She said the man was standing in the middle of...,-,NEUTRAL
1,2,Everyday tHe DiAsPoRa NeEd To ClAiM BRiTaiN Ca...,[better],POSITIVE
2,3,NSW police aid victims families in Canada foll...,[murder],NEGATIVE
3,4,Just ordered PapaJohns Use promo code PAPATRAC...,-,NEUTRAL
4,5,David thompson marfred art IN THE SADDLE 2SASK...,-,NEUTRAL
5,6,Canadian police can tolerate abortion parades ...,-,NEUTRAL
6,7,Amazing atmosphere here in DC as Prime Ministe...,-,NEUTRAL
7,8,The UKs handling of trade talks with Canada do...,[well],POSITIVE
8,9,dominicapold ppclondonwest kenpro11 Brianevera...,-,NEUTRAL
9,10,O-Red choked in the 2nd amp 3rd Danny starts t...,-,NEUTRAL


In [27]:
# export output to csv
df.to_csv("sentiment_output.csv", encoding='utf-8', index=False)