In [1]:
# This is Main function.
# Extracting streaming data from Twitter, pre-processing, and loading into MySQL
import credentials # Import api/access_token keys from credentials.py
import setting # Import related setting constants from settings.py 

import re
import tweepy
import mysql.connector
import pandas as pd
from textblob import TextBlob
# Streaming With Tweepy 
# http://docs.tweepy.org/en/v3.4.0/streaming_how_to.html#streaming-with-tweepy


# Override tweepy.StreamListener to add logic to on_status
class MyStreamListener(tweepy.StreamListener):
    '''
    Tweets are known as ‚Äústatus updates‚Äù. So the Status class in tweepy has properties describing the tweet.
    https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object.html
    '''
    
    def on_status(self, status):
        '''
        Extract info from tweets
        '''
        
        if status.retweeted:
            # Avoid retweeted info, and only original tweets will be received
            return True
        # Extract attributes from each tweet
        id_str = status.id_str
        created_at = status.created_at
        text = deEmojify(status.text)    # Pre-processing the text  
        sentiment = TextBlob(text).sentiment
        polarity = sentiment.polarity
        subjectivity = sentiment.subjectivity
        
        user_created_at = status.user.created_at
        user_location = deEmojify(status.user.location)
        user_description = deEmojify(status.user.description)
        user_followers_count =status.user.followers_count
        longitude = None
        latitude = None
        if status.coordinates:
            longitude = status.coordinates['coordinates'][0]
            latitude = status.coordinates['coordinates'][1]
            
        retweet_count = status.retweet_count
        favorite_count = status.favorite_count
        
        print(status.text)
        print("Long: {}, Lati: {}".format(longitude, latitude))
        
        # Store all data in MySQL
        if mydb.is_connected():
            mycursor = mydb.cursor()
            sql = "INSERT INTO {} (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)".format(setting.TABLE_NAME)
            val = (id_str, created_at, text, polarity, subjectivity, user_created_at, user_location, \
                user_description, user_followers_count, longitude, latitude, retweet_count, favorite_count)
            mycursor.execute(sql, val)
            mydb.commit()
            mycursor.close()
    
    
    def on_error(self, status_code):
        '''
        Since Twitter API has rate limits, stop srcraping data as it exceed to the thresold.
        '''
        if status_code == 420:
            # return False to disconnect the stream
            return 

In [2]:
def clean_tweet(self, tweet): 
    ''' 
    Use sumple regex statemnents to clean tweet text by removing links and special characters
    '''
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t]) \
                                |(\w+:\/\/\S+)", " ", tweet).split()) 
def deEmojify(text):
    '''
    Strip all non-ASCII characters to remove emoji characters
    '''
    if text:
        return text.encode('ascii', 'ignore').decode('ascii')
    else:
        return None

In [3]:

mydb = mysql.connector.connect(
    host="localhost",
    user="root",
    passwd="",
    database="Twitterdb",
    charset = 'utf8'
)
if mydb.is_connected():
    '''
    Check if this table exits. If not, then create a new one.
    '''
    mycursor = mydb.cursor()
    mycursor.execute("""
        SELECT COUNT(*)
        FROM information_schema.tables
        WHERE table_name = '{0}'
        """.format(setting.TABLE_NAME))
    if mycursor.fetchone()[0] != 1:
        mycursor.execute("CREATE TABLE {} ({})".format(setting.TABLE_NAME, setting.TABLE_ATTRIBUTES))
        mydb.commit()
    mycursor.close()

In [4]:
auth  = tweepy.OAuthHandler(credentials.API_KEYS, credentials.API_SECRET_KEYS)
auth.set_access_token(credentials.ACCESS_TOKEN, credentials.ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)

In [5]:
myStreamListener = MyStreamListener()
myStream = tweepy.Stream(auth = api.auth, listener = myStreamListener)
myStream.filter(languages=["en"], track = setting.TRACK_WORDS)
# Close the MySQL connection as it finished
# However, this won't be reached as the stream listener won't stop automatically
# Press STOP button to finish the process.
mydb.close()

Check out what I just added to my closet on Poshmark: Louis Vuitton Noe Petit Bag. https://t.co/H4ZbbKpbjX via @poshmarkapp #shopmycloset
Long: None, Lati: None
RT @sunglassesemoji: Middle aged white women love to come in wearing Louis Vuitton and tipping their servers $0. Who‚Äôre you flexing on baby‚Ä¶
Long: None, Lati: None
i was singing lyrics while on my hike earlier today, "gucci gucci, loui loui, fenti fenti, prada." 

now i get Loui‚Ä¶ https://t.co/hiVSk0EIGk
Long: None, Lati: None
RT @Reveblinkland: I just peeped Seulgi and Jeongyeon got the same Louis Vuitton green suits ü§®ü§î. Am I just slow or did y'all notice too htt‚Ä¶
Long: None, Lati: None
@LouisVuitton Hope the whole of Louis Vuitton is well and safe in these difficult times https://t.co/qegGYCRzjS
Long: None, Lati: None
with a louis vuitton speedy bag
Long: None, Lati: None
RT @tearsofjisoo: streets saying jisoo x louis vuitton but wtf is bitton https://t.co/GOrpWGrmRL
Long: None, Lati: None
RT @BoF: The answer has

RT @iblinkforblinkz: Help clean twitter search bar
(Type and search) 

Jisoo Cute
Jisoo Dior
Jisoo Burberry
Jisoo Louis Vuitton
Jisoo Carti‚Ä¶
Long: None, Lati: None
RT @bryanaleroux: she lick it up just like a candy, she wanna make 'em leave their family, she trying to live a life so fancy, she wanna pu‚Ä¶
Long: None, Lati: None
RT @sooyaswhore: WAIT JISOO AND LOUIS VUITTON CONFIRMED????? FUCK YES
Long: None, Lati: None
@balleralert Well they are weird aren‚Äôt they? Didn‚Äôt she say she is ok with being in a three way? Plus Jaden starre‚Ä¶ https://t.co/1MVczWfzcb
Long: None, Lati: None
RT @chanbaekhyuned: Believe or not but these 4 dorks are booked by the top fashion luxury &amp; cosmetic brands

üê∂BAEKHYUN: Burberry, Priv√® All‚Ä¶
Long: None, Lati: None
RT @marnurce: Air Force 1 X Louis Vuitton. 
Size: 40-45 
240ghc. 
Call Or WhatsApp Me And it‚Äôs Yours. 
#SketchsClothing. https://t.co/Oqzlp‚Ä¶
Long: None, Lati: None
the louis vuitton trash cans have expired, so i decided to reve

KeyboardInterrupt: 