In [12]:
!pip install tweepy

[0m

In [13]:
import os
import time
import json
import socket

import tweepy
from tweepy.auth import OAuthHandler
from tweepy import Stream

from IPython.display import clear_output
from google.cloud import storage

In [14]:
# request to get credentials at http://apps.twitter.com
consumer_key    = '**************************'
consumer_secret = '**************************'
access_token    = '**************************'
access_token_secret   = '**************************'

In [15]:
def upload_blob_from_memory(bucket_name, contents, destination_blob_name):
    """Uploads a file to the bucket."""

    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"

    # The contents to upload to the file
    # contents = "these are my contents"

    # The ID of your GCS object
    # destination_blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_string(contents)

    print("{} uploaded to {}.".format(destination_blob_name, bucket_name))

In [16]:
# Documentation - https://docs.tweepy.org/en/stable/streaming.html
# Inherits from the Stream in tweepy - writes to provided client connection
class ClientConnectionTweetStream(Stream):

    # def __init__(self, consumer_key, consumer_secret, access_token, access_secret, csocket):
    #     self.client_socket = csocket
    
    def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret, client_connection):
        self.client_connection = client_connection
        super(ClientConnectionTweetStream, self).__init__(consumer_key, consumer_secret, access_token, access_token_secret)
        
    # we override the on_data() function in StreamListener
    def on_data(self, data):
        try:
            message = json.loads( data )
            clear_output(wait=True)
            print( message['text'].encode('utf-8') )
            self.client_connection.send(json.dumps(message).encode('utf-8'))
            return True
        except BaseException as e:
            print("Error on_data: %s" % str(e))
            print("Disconnecting client and stream...")
            self.client_connection.close()
            self.disconnect()
            print("Disconnected.")
            return False
        
        return True

    def if_error(self, status):
        print(status)
        return True


In [17]:
# Documentation - https://docs.tweepy.org/en/stable/streaming.html
# class TimeLimitTweetStream(Stream):

#     def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret, time_limit=60):
#         self.start_time = time.time()
#         self.limit = time_limit
#         super(TimeLimitTweetStream, self).__init__(consumer_key, consumer_secret, access_token, access_token_secret)

#     def on_data(self, data):
#         try:
#             if (time.time() - self.start_time) < self.limit:
#                 msg = json.loads(data)
#                 #print("new message")
#                 fileName = time.strftime("%Y-%m-%d-%H-%M.json")
#                 with open('./Data/' + fileName, 'a') as outfile:
#                     #outfile.write(msg)
#                     json.dump(msg, outfile)
#                     outfile.write('\n')
                    
#                 return True
#             else:
#                 print("Time Limit Reached - Disconnecting...")
#                 self.disconnect()
#                 print("Time Limit Reached - Disconnected.")
#                 return False
#             return True
#         except BaseException as e:
#             print("Error on_data: %s" % str(e))
#         return True

#     def on_error(self, status):
#         print(status)
#         return True

In [18]:
# Documentation - https://docs.tweepy.org/en/stable/streaming.html
class TweetLimitTweetStream(Stream):

    def __init__(self, consumer_key, consumer_secret, access_token, access_token_secret, tweet_limit=1000):
        self.tweet_limit = tweet_limit
        self.tweet_count = 0
        super(TweetLimitTweetStream, self).__init__(consumer_key, consumer_secret, access_token, access_token_secret)

    def on_data(self, data):
        try:
            if self.tweet_count < self.tweet_limit:
                msg = json.loads(data)
                print("new message")
                
                #fileName = './Data/' + time.strftime("%Y-%m-%d-%H-%M.json")
                fileName = 'Data/' + time.strftime("%Y-%m-%d-%H-%M-") + str(msg['id']) + '.json'
                
                # Write to google cloud storage
                upload_blob_from_memory('cloud-project-bucket-22', json.dumps(msg), fileName)
                
                #os.makedirs(os.path.dirname(fileName), exist_ok=True)
#                 with open(fileName, 'a') as outfile:
#                     json.dump(msg, outfile)
#                     outfile.write('\n')
                    
                self.tweet_count = self.tweet_count + 1
                return True
            else:
                print("Tweet Limit Reached - Disconnecting...")
                self.disconnect()
                print("Tweet Limit Reached - Disconnected.")
                return False
            return True
        except BaseException as e:
            print("Error on_data: %s" % str(e))
            print("Tweet Limit Reached - Disconnecting...")
            self.disconnect()
            print("Tweet Limit Reached - Disconnected.")
        return True

    def on_error(self, status):
        print(status)
        return True

In [19]:
def send_tweets_to_client(client_connection, topic):
    # Create client connection stream
    twitter_stream = ClientConnectionTweetStream(consumer_key, consumer_secret, access_token, access_token_secret, client_connection)
    
    # Filter for topic
    twitter_stream.filter(track=topic)
    


In [20]:
def start_client_connection_server(topic):
    # Get host and port name for service.
    host = socket.gethostname()
    port = 5555
    
    # initiate a socket object
    s = socket.socket()
    
    # Binding host and port
    s.bind((host, port))

    print("Now listening on port: %s" % str(port))

    #  Waiting for client connection.
    s.listen(5)
    
    # Establish connection with client. it returns first a socket object,c, and the address bound to the socket
    client_connection, addr = s.accept()  

    print("Received request from: " + str(addr))
    
    # and after accepting the connection, we aill sent the tweets through the socket
    send_tweets_to_client(client_connection, topic)

In [21]:
def start_tweet_limit(topic, tweet_limit=10):
    
    # Create tweet limit tweet stream
    twitter_stream = TweetLimitTweetStream(consumer_key, consumer_secret, access_token, access_token_secret, tweet_limit)
    
    # Filter for topic
    twitter_stream.filter(track=topic)
    

In [22]:
if __name__ == "__main__":
#     new_skt = socket.socket()         # initiate a socket object
#     host = socket.gethostname()
#     port = 5555                 # specific port for your service.
#     new_skt.bind((host, port))        # Binding host and port

#     print("Now listening on port: %s" % str(port))

#     new_skt.listen(5)                 #  waiting for client connection.
#     c, addr = new_skt.accept()        # Establish connection with client. it returns first a socket object,c, and the address bound to the socket

#     print("Received request from: " + str(addr))
    
    # and after accepting the connection, we aill sent the tweets through the socket
    #send_tweets(c)
    #send_tweets(None)
    
    # Sends tweet stream to client connection
    #start_client_connection_server('football')
    
    # Saves number of tweets to GCS bucket
    start_tweet_limit('football', 100)

new message
Data/2022-03-10-20-57-1502025931732443137.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931753414667.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931250364416.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931657035780.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931178741762.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931577344001.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931715665933.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931657039891.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931518582790.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931661230082.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-150202

Stream connection closed by Twitter


Data/2022-03-10-20-57-1502025931799633937.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931468197888.json uploaded to cloud-project-bucket-22.
new message
Data/2022-03-10-20-57-1502025931535364102.json uploaded to cloud-project-bucket-22.
new message
Error on_data: 'id'
Tweet Limit Reached - Disconnecting...
Tweet Limit Reached - Disconnected.
