# Twitter Stream with Python

In [1]:
# If needed
#!pip install tweepy
#!pip install textblob
#!pip install nltk
# 2wEURk users, add "--user"

# If needed
#!python -m textblob.download_corpora

In [2]:
from tweepy.streaming import StreamListener
from tweepy import OAuthHandler
from tweepy import Stream
from textblob import TextBlob
from textblob.classifiers import NaiveBayesClassifier
from textblob.sentiments import NaiveBayesAnalyzer
from nltk.corpus import twitter_samples
import json
import random

In [3]:
# Don't want this in GitHub
import twitter_credentials

In [4]:
# Creating authentication keys
auth = OAuthHandler(twitter_credentials.consumer_key, twitter_credentials.consumer_secret)
auth.set_access_token(twitter_credentials.access_token, twitter_credentials.access_token_secret)

In [5]:
train = []

# First the negs
for tokens in twitter_samples.tokenized('negative_tweets.json'):
    train.append((tokens, 'neg'))
    
# First the poss
for tokens in twitter_samples.tokenized('positive_tweets.json'):
    train.append((tokens, 'pos'))

random.shuffle(train)
train = train[0:100]
cl = NaiveBayesClassifier(train)

In [6]:
class Tweet:
    def __init__(self, data, cl):
        # Hint : print(self._tweet.keys()) for all keys in the tweet
        self._tweet = json.loads(data)
        self.blob1 = TextBlob(self._tweet["text"], classifier=cl)
        self.blob2 = TextBlob(self._tweet["text"], analyzer=NaiveBayesAnalyzer())
        
    def print_tweet(self):
        print()
        print(self._tweet["id_str"], self._tweet["created_at"])
        print(self._tweet["text"])
    
    def print_language(self):
        print("language", self.blob1.detect_language())
        
    def print_sentiment(self):
        print("sentiment", self.blob1.classify())
        print(self.blob2.sentiment)

In [7]:
class MyListener(StreamListener):
    def __init__(self, max_count, cl):
        self.max_count = max_count
        self.count = 0
        self.cl = cl
    
    def on_data(self, data):
        self.tweet = Tweet(data, cl)
        self.tweet.print_tweet()
        self.tweet.print_language()
        self.tweet.print_sentiment()
                
        self.count += 1
        if self.count >= self.max_count:
            return False
        return True

In [8]:
# Create a listener, define max tweets we'll process
mylistener = MyListener(10, cl)

In [9]:
mystream = Stream(auth, listener=mylistener)

In [10]:
# Creating a list of keywords to search the Tweets
keywords = ['Python', 'Jupyter', 'eur.nl']

In [11]:
mystream.filter(track = keywords)


857002738118348801 Tue Apr 25 22:45:49 +0000 2017
@herufeanor Isn't there a Monty Python skit about fracturing socialist revolutionaries?
language en
sentiment neg
Sentiment(classification='pos', p_pos=0.899232237392307, p_neg=0.10076776260769248)

857002743860514816 Tue Apr 25 22:45:51 +0000 2017
BMW Group  will start Reasearch in the Role...  #javascript #Python https://t.co/wSo25BUbKc
language en
sentiment pos
Sentiment(classification='neg', p_pos=0.4600880582754997, p_neg=0.5399119417244997)

857002784192962562 Tue Apr 25 22:46:00 +0000 2017
RT @DD_NaNa_: 5,993+ Enrolled Deep Learning Prerequisites: Logistic Regression in #Python https://t.co/caOMN5hMK8
language en
sentiment pos
Sentiment(classification='pos', p_pos=0.504249181350383, p_neg=0.49575081864961823)

857002789691682816 Tue Apr 25 22:46:02 +0000 2017
@AutomaticWickie @Dr_Draper Slightly reminds me of the Python "Four Yorkshiremen" sketch - "I used to wake up half… https://t.co/qJqIyRQcIk
language en
sentiment pos
Sentim

In [12]:
# Disconnects the streaming data
mystream.disconnect()