In [4]:
import re, json, os
import tweepy 
from datetime import datetime
import pandas as pd
pd.set_option('display.max_colwidth', None)

### Getting a client 

In [5]:
with open('.twitter_config.json') as f:
    config = json.load(f)
    
client = tweepy.Client(bearer_token = config['bearer']) 

print(client)

<tweepy.client.Client object at 0x10e9ee6d0>


### Get tweets by list of ID's

In [141]:
idlist = [1545362176126468097,1545361147544625153,1545362059021484031]
tweets = client.get_tweets(ids = idlist,  
                           expansions='author_id',
                           tweet_fields = ['id,created_at,text,lang']
                           )

# the returned object is of type tweepy.client.Response and has components Response(data, includes, errors, meta)
print(tweets)

Response(data=[<Tweet id=1545362176126468097 text="I'm having a very lazy day😒">, <Tweet id=1545361147544625153 text="Stupid people will pay for this and claim they're changing the world...😒 https://t.co/nVWnmh8tGM">], includes={'users': [<User id=937101228554768385 name=toyese username=tyevanz>, <User id=1356236658325286918 name=InsertWittyCryptoNameHere username=GendoCrypto>]}, errors=[{'value': '1545362059021484031', 'detail': 'Could not find tweet with ids: [1545362059021484031].', 'title': 'Not Found Error', 'resource_type': 'tweet', 'parameter': 'ids', 'resource_id': '1545362059021484031', 'type': 'https://api.twitter.com/2/problems/resource-not-found'}], meta={})


In [142]:
# data object contains the tweets returned - list of tweets
tweets.data

[<Tweet id=1545362176126468097 text="I'm having a very lazy day😒">,
 <Tweet id=1545361147544625153 text="Stupid people will pay for this and claim they're changing the world...😒 https://t.co/nVWnmh8tGM">]

In [143]:
# error object contains the list of requests which errored - json 
# one of the id's in the list was a bad ID, and it shows up in the error object
tweets.errors

[{'value': '1545362059021484031',
  'detail': 'Could not find tweet with ids: [1545362059021484031].',
  'title': 'Not Found Error',
  'resource_type': 'tweet',
  'parameter': 'ids',
  'resource_id': '1545362059021484031',
  'type': 'https://api.twitter.com/2/problems/resource-not-found'}]

In [144]:
# "includes" object returns additional info for the list of users 
# as requested in the "expansions" parameter of the request "expansions = 'author_id'"
tweets.includes

{'users': [<User id=937101228554768385 name=toyese username=tyevanz>,
  <User id=1356236658325286918 name=InsertWittyCryptoNameHere username=GendoCrypto>]}

In [146]:
# retrieving data from the tweets object
# printing the tweet object will only show tweet_id and text - this is because the string representation (repr)
# of the tweet object is intentionally defined to show most basic information
# we need to explicitly access each element of the Tweet object to get the details

for tweet in tweets.data:
    
    print(f"""author ID = {tweet.author_id} \
              tweet ID = {tweet.id} \
              created_at = {tweet.created_at} \
              text =  {tweet.text} \
              language = {tweet.lang}
            """
          )

author ID = 937101228554768385               tweet ID = 1545362176126468097               created_at = 2022-07-08 11:00:28+00:00               text =  I'm having a very lazy day😒               language = en
            
author ID = 1356236658325286918               tweet ID = 1545361147544625153               created_at = 2022-07-08 10:56:23+00:00               text =  Stupid people will pay for this and claim they're changing the world...😒 https://t.co/nVWnmh8tGM               language = en
            


### Get tweets by search keys (queries) using paginator

In [171]:
# With this feature, results from any of the search functions from the twitter client object 
# can be paginated and processed 
# Paginator needs the following:
#        1. The search API (the function object itself is passed)
#        2. The query to be passed to the function
#        3. Fields required in the response ( tweet_fields or user_fields or place_fields etc.)
#        4. Number of items required ( tweet_fields or user_fields or place_fields etc.)

# The Paginator returns a generator of tweet objects which can be iterated over to check it's content

# For example, let's take 'search_recent_tweets' : returns Tweets from the last 7 days that match query.

query = '(happiness OR happy) -is:retweet -is:reply lang:en -birthday -Birthday -weekend -Weekend'
max_tweets = 4
tweets = tweepy.Paginator(client.search_recent_tweets, 
                          query=query,
                          start_time = '2022-07-03T00:00:00Z',
                          end_time = '2022-07-05T23:59:59Z',
                          tweet_fields=['id,created_at,text,lang'], 
                          max_results=100).flatten(limit=max_tweets)

In [172]:
# return type is a generator of Tweet objects
type(tweets) 

generator

In [173]:
for tweet in tweets:
    print(f'''tweet ID = {tweet.id} \
              created_at = {tweet.created_at} \
              text =  {tweet.text} \
              language = {tweet.lang} 
            ''' 
          )

tweet ID = 1544471177837813760               created_at = 2022-07-05 23:59:58+00:00               text =  How about ur nineteenth years? @phuwintang 
Hope you’re happy with everything you do and everything you go through in life. Please take care krub. 🤍
#phuwintang #WhatANiceDayPhuwin https://t.co/JcO3PCIABH               language = en 
            
tweet ID = 1544471177124536321               created_at = 2022-07-05 23:59:58+00:00               text =  I really wanna be happy for you but I can’t 🫠😭😭😭 https://t.co/pmNfMbAtot               language = en 
            
tweet ID = 1544471174540734464               created_at = 2022-07-05 23:59:57+00:00               text =  I’m so happy. You’ve come so far. I always knew you would make it here in the end.               language = en 
            
tweet ID = 1544471174146453504               created_at = 2022-07-05 23:59:57+00:00               text =  Name one hero who was happy.               language = en 
            


### Create a StreamListener for tweets

In [236]:
# Create a custom stream client based on the base tweepy StreamingClient

class TwitterStreamClient(tweepy.StreamingClient):
    
    # Initialize the client using the base client class' constructor by using the bearer token
    def __init__(self,maxitems):
        try: 
            with open('.twitter_config.json') as f:
                config = json.load(f)

            tweepy.StreamingClient.__init__(self,bearer_token = config['bearer'])
            self.tweet_counter = 0
            self.maxitems = maxitems

        except Exception as e:
            print('Error in intializing the stream')       
    
    # on_connect is called whenever the client is connected. 
    def on_connect(self):
        print("Connected to twitter api")
    
    # on_disconnect is called whenever the client is disconnected. 
    def on_disconnect(self):
        print("Disconnected from twitter api")
    
    # on_response is called whenever a response is received - we catch and print the tweet object here
    def on_response(self, response):

        self.tweet_counter  += 1
        print(f"TWEET COUNTER = {self.tweet_counter}")

        tweet = response.data

        print(f"ID = {tweet.id} | created_at = {tweet.created_at} | text =  {tweet.text} | lang = {tweet.lang} ")
        
        # the stream disconnects when the maximum number of items is reached
        if self.tweet_counter == self.maxitems:
            print(f"Max items reached")
            self.disconnect()

    # on_request_error is called whenever a response is received - we catch and print the tweet object here
    def on_request_error(self,status_code):
        print(f"Request error: {status_code}")
        self.disconnect()

In [245]:
# Initialize a TwitterStreamClient for maximum of 5 items
stream = TwitterStreamClient(maxitems = 5)

In [247]:
# delete any StreamRule that is associated to this stream
# rules stay cached to the stream
# duplicate rules are not added 
existing_rules = stream.get_rules().data 
if existing_rules is not None:
    print(f"Deleting rules: {existing_rules}")
    stream.delete_rules(ids = [rule.id for rule in existing_rules])
else:
    print("No existing rules found")

No existing rules found


In [248]:
# Create a StreamRule to the stream as a filter specification
query = '(elon musk) -is:retweet -is:reply lang:en'
rule = tweepy.StreamRule(query)

# Add the StreamRule to the stream to make it a filtered stream
stream.add_rules(rule)

Response(data=[StreamRule(value='(elon musk) -is:retweet -is:reply lang:en', tag=None, id='1545728767384989697')], includes={}, errors=[], meta={'sent': '2022-07-09T11:17:11.081Z', 'summary': {'created': 1, 'not_created': 0, 'valid': 1, 'invalid': 0}})

In [249]:
# check if the rule has been added
stream.get_rules().data 

[StreamRule(value='(elon musk) -is:retweet -is:reply lang:en', tag=None, id='1545728767384989697')]

In [250]:
# Start the streaming - in this example we would use the filter API for the stream
stream.filter(tweet_fields = ['id,created_at,text,lang'])

Connected to twitter api
TWEET COUNTER = 1
ID = 1545728791967834112 | created_at = 2022-07-09 11:17:16+00:00 | text =  Elon Musk Cancels $44 Billion Twitter Deal https://t.co/S1KfNEUgfz #survivorGR #πολιτικη #Πολεμος_στην_Ουκρανια #greece #πανδημια | lang = en 
TWEET COUNTER = 2
ID = 1545728792181837824 | created_at = 2022-07-09 11:17:16+00:00 | text =  Are you retarded?! He paid more in taxes the last three years (separately) than any person in history. He literally set the record for most taxes paid by a single person three years in a row. You are SO ignorant. You're dumber than a box of rocks. I'd literally cry if I were you https://t.co/PrRtgOhqHV | lang = en 
TWEET COUNTER = 3
ID = 1545728791103905792 | created_at = 2022-07-09 11:17:16+00:00 | text =  Elon Musk withdraws $44bn bid to buy Twitter after weeks of high drama | Elon Musk | The Guardian https://t.co/nT6dmtzPzj | lang = en 
TWEET COUNTER = 4
ID = 1545728805880205313 | created_at = 2022-07-09 11:17:20+00:00 | text =  Bett

Stream connection closed by Twitter


TWEET COUNTER = 5
ID = 1545728816319954945 | created_at = 2022-07-09 11:17:22+00:00 | text =  Elon Musk Cancels $44 Billion Twitter Deal https://t.co/F9l5yGaqRK #Πολεμος_στην_Ουκρανια #πολιτικη #οικονομια #επικαιροτητα #survivorGR #news #greece #πανδημια | lang = en 
Max items reached
Disconnected from twitter api


In [244]:
# explicitly disconnect the stream
stream.disconnect()