In [2]:

# This is Main function.
# Extracting streaming data from Twitter, pre-processing, and loading into MySQL
import credentials # Import api/access_token keys from credentials.py
import settings # Import related setting constants from settings.py 

import re
import tweepy
import mysql.connector
import pandas as pd
from textblob import TextBlob
# Streaming With Tweepy 
# http://docs.tweepy.org/en/v3.4.0/streaming_how_to.html#streaming-with-tweepy

In [3]:
# Override tweepy.StreamListener to add logic to on_status
class MyStreamListener(tweepy.StreamListener):
    '''
    Tweets are known as “status updates”. So the Status class in tweepy has properties describing the tweet.
    https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object.html
    '''
    
    def on_status(self, status):
        '''
        Extract info from tweets
        '''
        
        if status.retweeted:
            # Avoid retweeted info, and only original tweets will be received
            return True
        # Extract attributes from each tweet
        id_str = status.id_str
        created_at = status.created_at
        text = deEmojify(status.text)    # Pre-processing the text  
        sentiment = TextBlob(text).sentiment
        polarity = sentiment.polarity
        subjectivity = sentiment.subjectivity
        
        user_created_at = status.user.created_at
        user_location = deEmojify(status.user.location)
        user_description = deEmojify(status.user.description)
        user_followers_count =status.user.followers_count
        longitude = None
        latitude = None
        if status.coordinates:
            longitude = status.coordinates['coordinates'][0]
            latitude = status.coordinates['coordinates'][1]
            
        retweet_count = status.retweet_count
        favorite_count = status.favorite_count
        
        print(status.text)
        print("Long: {}, Lati: {}".format(longitude, latitude))
        
        # Store all data in MySQL
        if mydb.is_connected():
            mycursor = mydb.cursor()
            sql = "INSERT INTO {} (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(settings.TABLE_NAME)
            val = (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, \
                user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count)
            mycursor.execute(sql, val)
            mydb.commit()
            mycursor.close()
    
    
    def on_error(self, status_code):
        '''
        Since Twitter API has rate limits, stop srcraping data as it exceed to the thresold.
        '''
        if status_code == 420:
            # return False to disconnect the stream
            return False

In [4]:
def clean_tweet(self, tweet): 
    ''' 
    Use sumple regex statemnents to clean tweet text by removing links and special characters
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) \
                                |(\w+:\/\/\S+)", " ", tweet).split()) 
def deEmojify(text):
    '''
    Strip all non-ASCII characters to remove emoji characters
    '''
    if text:
        return text.encode('ascii', 'ignore').decode('ascii')
    else:
        return None

In [5]:
mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    passwd="jerin",
    database="TwitterDB",
    charset = 'utf8'
)
if mydb.is_connected():
    '''
    Check if this table exits. If not, then create a new one.
    '''
    mycursor = mydb.cursor()
    mycursor.execute("""
        SELECT COUNT(*)
        FROM information_schema.tables
        WHERE table_name = '{0}'
        """.format(settings.TABLE_NAME))
    if mycursor.fetchone()[0] != 1:
        mycursor.execute("CREATE TABLE {} ({})".format(settings.TABLE_NAME, settings.TABLE_ATTRIBUTES))
        mydb.commit()
    mycursor.close()

In [7]:
auth  = tweepy.OAuthHandler(credentials.API_KEY, credentials.API_SECRET_KEY)
auth.set_access_token(credentials.ACCESS_TOKEN, credentials.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

In [8]:
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener = myStreamListener)
myStream.filter(languages=["en"], track = settings.TRACK_WORDS)
# Close the MySQL connection as it finished
# However, this won't be reached as the stream listener won't stop automatically
# Press STOP button to finish the process.
mydb.close()

RT @DRuys19: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Trump bal…
Long: None, Lati: None
RT @atensnut: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Trump ba…
Long: None, Lati: None
@Rich_Nardella @BKahn_ rich shut up lmfaooo this ain’t middle aged facebook, gn y’all 😌
Long: None, Lati: None
RT @marklevinshow: 1. Hurry and follow me at Parler. I’m trying to encourage as many of you as possible to immediately join me there as I m…
Long: None, Lati: None
@realDonaldTrump blob:https://t.co/57UDdw5gCH
Long: None, Lati: None
RT @atensnut: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Trump ba…
Long: None, Lati: None
RT @kylie_oneil75: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Tru…
Long: None, Lati: None


@doqholliday @PatriotJ_ Not everyone will have the same thirst for justice as you. 

Some will snap, or become offe… https://t.co/TxR9YVuJ4s
Long: None, Lati: None
RT @atensnut: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Trump ba…
Long: None, Lati: None
RT @gatewaypundit: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Tru…
Long: None, Lati: None
RT @patvinzant: Too little too late. Go to https://t.co/dzhwD6WRl1
Long: None, Lati: None
RT @atensnut: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Trump ba…
Long: None, Lati: None
RT @KeithOlbermann: SAVE BLUE #105732: They’ll kill this affectionate, respectful, proactively friendly 2-year old TUESDAY. He’s especially…
Long: None, Lati: None
RT @VeBo1991: Stay tuned NC! Voter Integrity Project in NC has found evidence of fraudulen

RT @GoJackFlynn: There is no president elect chosen until all votes are counted and certified. We will not give in to the charade until it’…
Long: None, Lati: None
The inconsistent standards by which Facebook appears to be enforcing these bans, during an incredibly tense period… https://t.co/ZpBa9qJ7jW
Long: None, Lati: None
Levius Cops Watcher is now live # # #live | Watch multi stream at https://t.co/1WAdPtndUT  Joining our group to sha… https://t.co/fZQOafNpMM
Long: None, Lati: None
RT @gatewaypundit: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Tru…
Long: None, Lati: None
Right https://t.co/1BCoXnU2HI
Long: None, Lati: None
RT @gatewaypundit: Self-Described Dem Party Worker, Detroit Resident, Brags On Facebook: “I work for Wayne Co, MI and I threw out every Tru…
Long: None, Lati: None
not this on my aunt’s facebook page https://t.co/k0OmN8an9I
Long: None, Lati: None
HAPPENING NOW: 5th National Research and Deve

KeyboardInterrupt: 

In [9]:
mydb.close()