# Tweet Sentiment Analysis Project, Part I

## Getting the Tweets

## By Jarrod Mautz

In [1]:
import re
import pandas as pd
import tweepy as tw
from tweepy import OAuthHandler
from textblob import TextBlob
import configparser

# Create a TwitterAPIClient Class

Initialization authenticates our twitter API. We can then get_tweets, clean a tweet, or get_tweet_sentiment

In [None]:
class TwitterClient(object):
    '''
    Generic Twitter Class for sentiment analysis.
    '''
    def __init__(self):
        '''
        Class constructor or initialization method.
        '''
        
        # grab config passwords from config.ini
        config = configparser.ConfigParser()
        config.read('config.ini')
        
        
        # keys and tokens from the Twitter Dev Console
        api_key = config['twitter']['api_key']
        api_key_secret = config['twitter']['api_key_secret']
        access_token = config['twitter']['access_token']
        access_token_secret = config['twitter']['access_token_secret']
  
        # attempt authentication
        try:
            # create OAuthHandler object
            self.auth = OAuthHandler(api_key, api_key_secret)
            # set access token and secret
            self.auth.set_access_token(access_token, access_token_secret)
            # create tweepy API object to fetch tweets
            self.api = tw.API(self.auth)
        except:
            print("Error: Authentication Failed")
            
  
    def get_tweets(self, query, count = 2, lang = 'en'):
        '''
        Main function to fetch tweets and parse them.
        '''
        # empty list to store parsed tweets
        tweets = []
  
        try:
            # call twitter api to fetch pagination of tweets
            fetched_tweets = tw.Cursor(self.api.search_tweets, q = query, lang = lang, tweet_mode = 'extended').items(count)
            
            # parsing tweets one by one
            for tweet in fetched_tweets:
                # empty dictionary to store required params of a tweet
                parsed_tweet = {}
            
                # saving text of tweet
                parsed_tweet['Text'] = self.clean_tweet(tweet.full_text)
                
                parsed_tweet['Time'] = tweet.created_at
                
                parsed_tweet['Location'] = self.clean_tweet(tweet.user.location)
                
                parsed_tweet['User'] = tweet.user.screen_name
                
                parsed_tweet['Verified'] = tweet.user.verified
                
                # saving sentiment of tweet
                parsed_tweet['Sentiment'] = self.get_tweet_sentiment(self.clean_tweet(tweet.full_text))

                tweets.append(parsed_tweet)
  
            # return parsed tweets
            return tweets
  
        except tw.errors.TweepyException as e:
            # print error (if any)
            print("Error: " + str(e))
            
    def clean_tweet(self, tweet):
        '''
        Utility function to clean tweet text by removing links, special characters
        using simple regex statements.
        '''
        
        tweet = re.sub("(@[A-Za-z0-9]+) | ([^0-9A-Za-z \t]) | (\w+:\/\/\S+)", " ", tweet).lower().split()
        cleaned_tweet = " ".join(tweet)
        return cleaned_tweet
    
  
    def get_tweet_sentiment(self, tweet):
        '''
        Utility function to classify sentiment of passed tweet
        using textblob's sentiment method
        '''
        # create TextBlob object of passed tweet text
        analysis = TextBlob(self.clean_tweet(tweet))
        # set sentiment
        if analysis.sentiment.polarity > 0:
            return 'positive'
        elif analysis.sentiment.polarity == 0:
            return 'neutral'
        else:
            return 'negative'
        

## Instantiate Twitter API object

In [None]:
# creating object of TwitterClient Class
api = TwitterClient()

## Create query, then get tweets

In [None]:
# calling function to get tweets
query = "bitcoin -filter:retweets"
tweets = pd.DataFrame(api.get_tweets(query = query, count = 600))

## Limit duplicate tweets further

In [None]:
tweets.drop_duplicates(subset="Text", keep=False, inplace=True)

## Format our dataset, then export to csv

In [None]:
tweets['Location'] = tweets['Location'].str.replace(',','')
tweets['Text'] = tweets['Text'].str.replace(',','')

In [None]:
tweets.to_csv("bitcoin_tweets.csv", sep = ',')