In [None]:
!pip install hyper

from hyper.contrib import HTTP20Adapter
import requests

class TweetScraperError(Exception):
    ...

class TweetScraper:
    # _example_url = 'https://twitter.com/i/api/2/search/adaptive.json?include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&include_can_media_tag=1&skip_status=1&cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&include_ext_media_color=true&include_ext_media_availability=true&send_error_codes=true&simple_quoted_tweet=true&q=%23COVID20&count=20&query_source=typeahead_click&pc=1&spelling_corrections=1&ext=mediaStats%2ChighlightedLabel'
    _headers = {
        'Authorization': 'Bearer ',
        'x-csrf-token': '',
    }

    def __init__(self):
        self._sessions = requests.session()
        self._sessions.mount('https://', HTTP20Adapter())
    
    def _find_scroll(self, data):
        e = data['timeline']['instructions'][-1]
        if 'addEntries' in e:
            c = e['addEntries']['entries'][-1]
        elif 'replaceEntry' in e:
            c = e['replaceEntry']['entry']

        c = c['content']['operation']['cursor']
        if c['cursorType'] != 'Bottom':
            raise TweetScraperError("Failed to get scroll, invalid cursorType. Expected 'Bottom', but %s.", c['cursorType'])
        self._scroll = c['value']

    @staticmethod
    def urlgen(query, count=300, scroll=None):
        template_url = 'https://twitter.com/i/api/2/search/adaptive.json?q={query}&count={count}'

        if scroll is not None:
            template_url += '&cursor={scroll}'.format(scroll=scroll)
        return template_url.format(query=query, count=count)

    def _search(self, url):
        print('[DEBUG] URL = ', url)
        res = self._sessions.get(url, headers=self._headers)
        if res.status_code != 200:
            raise ScraperError("Failed to search. Status code is not 200, but %s", res.status_code)
        # Extract the json data to python builtin dict.
        # It may fail if the response data is not json format.
        data = res.json()

        self._find_scroll(data)
        # Only returns tweet data.
        return data['globalObjects']['tweets']

    def search(self, query, count=300):
        self._query, self._count, self._scroll = query, count, None
        return self._search(self.urlgen(self._query, self._count))
    
    def next(self):
        return self._search(self.urlgen(self._query, self._count, self._scroll))




In [None]:
s = TweetScraper()
tweets = s.search('%23Uber%20lang%3Aen%20until%3A2021-01-13%20since%3A2020-12-13')

print('# of tweets: ', len(tweets))

idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

print('#########################################', end='\n\n')

tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

print('#########################################', end='\n\n')

tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')


[DEBUG] URL =  https://twitter.com/i/api/2/search/adaptive.json?q=%23Uber%20lang%3Aen%20until%3A2021-01-13%20since%3A2020-12-13&count=300
# of tweets:  190
#Grab $15 billion ride-hailing and food-delivery giant backed by #SoftBank and #Uber is accelerating its expansion… https://t.co/L7UClvqs2C
created_at: Thu Jan 07 11:27:21 +0000 2021

"Trump's acting head of homeland security Chad Wolf resigns" https://t.co/epLA1vHfR6 #ChadWolf #resignation 

 - le… https://t.co/66ZLrSyF0M
created_at: Tue Jan 12 03:18:25 +0000 2021

Cleaned the house this morning. Thought I’d take the evening off by ordering a #McDonalds via #Uber for the family.… https://t.co/gdOpqgujDC
created_at: Sun Jan 10 20:08:52 +0000 2021

How Uber nearly swiped Google's self-driving crown - Business Insider https://t.co/EX43q6aFYB #uber #waymo #selfdriving #cars
created_at: Sun Jan 10 21:06:23 +0000 2021

$UBER #UBER Pump ittt i have got 5 &amp; 1/2 months of time left on this contract probably won’t sell till uber hits 75…

In [None]:
tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')


tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

[DEBUG] URL =  https://twitter.com/i/api/2/search/adaptive.json?q=%23Uber%20lang%3Aen%20until%3A2021-01-13%20since%3A2020-12-13&count=300&cursor=scroll:thGAVUV0VFVBYBFoCUhv_ngd-2JRIYyAUEPVQ7YUW71AAAAAAAABV8AAAABwAAAFbYAQwKj0bAVJcIJU4DlyAKFyKQAFWoAVAoFFBDD2WkM8aA5BskkWIA4NEhINKgKiABj88MDFIJAoAUJQCJHBEAXiQBKAyqgCyEYTAaU2AwgSNAYIiIOPNIW0XrAGbwQGYnCI6f0kNKESA-RO2PBxCcDMKiQKRaAcWchDjTRAAkoIcAAgM5FO0lgpBrFnCJQQlmoKI4ioVGCgJiB9w0BNgKAxRg4wRSVIDMbJAIhYCgQFA1qDhRITi2YAUAlsIRa7wMCilAuLSGaArCAilKhpRINJIBiGpCAAcYBARxgCUTSCJKBC208gpuMIt4DBHaEEAKEIFCBEgYICjLMHKEZKAJwPcApKgGIJIAsc5KRAZ1whvBMoBqCQAQMGJoT_BjRQAReBGTgwQXMRApUghASUAAWUzmc4hSAABGALUEAGYSEgUMjhRTDWuLA2a8SU0RIALEZBsyCOnAzoCJUigKHyQNAkGYgqEBBAs4BC0C0aGYgQgVApHAY-gKIBNbEsDwhJEoBDjKWkCCUYTOlAYSA8CJSwCgBRr4wYIjtGCAJGTgSQJE0YGwDoBggQIQKhRJVARFBowSXkEdJxBAoEJYCMAHKlPBibEOjkNZFMvBMGDwDKYbwoFRqgIBEDMBKMAcCnOCIQRIAwhxnEDQ6AQEwESCIUZSlkARKPxDxXCInCQAIAinLIBg0OpoKwQHiCAgBXID44gMQQQSygAUlmjEIQFOlCQQgRpIgBEZSYBhgQmg2AD1pkDsBSCSiSkNIAggTAgBFlDCZmlaAiARiV

In [None]:
tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')


tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

[DEBUG] URL =  https://twitter.com/i/api/2/search/adaptive.json?q=%23Uber%20lang%3Aen%20until%3A2021-01-13%20since%3A2020-12-13&count=300&cursor=scroll:thGAVUV0VFVBYBFoC4s4nkz--2JRIYyAUEPVQ7YUW71AAAAAAAABV8AAAABwAAAFb6AQ4Kj_bAVJcIpU6D1yALFyKQQlWoCVQ7HNBDD3ekt9aA5BskmWIA4NEhINOkqmABj88sDFIZIsAW5QCNHBEQXyQBaEyrkDyEYTCaV2C5jTNJYKiPePNIW-XrGWbwSmYnaK6f1kNKESI_TO2PVxCeDMaiQqRaCdWchDrzRQAkoOcgkyc5tO0lgpBrNnCNQRlupaI56sVGCgJjD9z0BtgKAxxk4wZSVYDMbJAIhYjpYVA16DlRZTi3YA8ClsKR67wtSmlIuLSGaBrCAqnuhpxKNpYBim7iEA8YBBxxwiUTSCJKBC208otuMZt4HBXaGEQPEcFCBMiZICnreHaE5KJd0PcBrKgGINIAse5qRQZ1wpvxNoBqCQCTsWJ4T_hzXQCR-hmTg0Q_cRArcghAy0AAWUzuc4hSBAVGCbUEAGcSUhUcjhzTDWuLI2a8SW0RIwLEZHs3COnAzoCLUihen2QPgkGYguGhBgs4lC1K0eGYgQoVApHIY-gqoBNbksHwhZUoLLraWmCPUYzOlYYSA8DJSwCgDRr_wcIrtGKAJOTg2QJk14GwDoBggQIQKlRJ1QRVZoySXkkdJxRAoEJYDMhHalPZibsOjkNZFuvhMWTwDKYbwoFxqophGDMBKMQdCnOjJQxIIwhxnGDS6A2EwXSCMUZSlkAZKPzDxXCYnDwAIAinPYBg0OtoKwYHyCQgFXoD44gcQQUSywQc92jEoUHOlDRQhVpogBE5WYBhwamk2AT1pkHsBWiSiSkNIBggTAgR3lTiZ2lfAmETiV

In [None]:
tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')


tweets = s.next()
print('# of tweets: ', len(tweets))
idx = 0
for tid, tweet in tweets.items():
    idx += 1
    print(f'[{idx}] ========== {tid} ==========')
    print(tweet['text'], end='\n\n')
    print(f"created_at: {tweet['created_at']}", end='\n\n')

[DEBUG] URL =  https://twitter.com/i/api/2/search/adaptive.json?q=%23Uber%20lang%3Aen%20until%3A2021-01-13%20since%3A2020-12-13&count=300&cursor=scroll:thGAVUV0VFVBYBFoC4s4nkz--2JRIYyAUEPVQ7YUW71AAAAAAAABV8AAAABwAAAFb6AQ5Kj_bIVJcIpU6D1yALFyKQQlWoCVQ7HNJDD3ekt9aA5BskmWIA4NEhINOkqmADj88sDFIZIsAW5QCNHBFQX6QBaEyrkDyEYbCaV2C5jTNJYaiPePNIW-XrGWbwSmYnaK6f1kNKESI_TO2PVxCeDMaiQqRaCdWchDrzRQAkoOchkyc5tO0lgpBrNnCNQRlupaI56sVGCgJjD9z0BtgKAxxk4w5SVYDMbJAIhZjpYVA16DlRdTi3YA8ClsKR67wtSmlIuLSGaBrCAqnuhpxKNpYBim7iEA8YBBxxwiUTSCJKBC208otuMZt4HBXaGEQPEcFCDMqZICnreHaE5KJd0PcBrKgGINIAse5qRQZ9wpvxNoBqCQCTsWJ4T_hzXYCR-hmTg0Q_cVArcghAy0AAWUzuc4hSjAVuCbUEAGcSUhUcjhzTD2uLI2a8SW0RIwLEZHs3COnAzoCLUihen2QPhkGYguGhBgs4lC1K0eGYgQoVApHI4-guoBNbksHwhZUoLLrbWmCPUazPlYYSA8DJSwigDRr_0cIrtGKAJOTg2QJk94GwDoBggQIQKlRJ1QRVZoySXkkdJxRAoEJYDMhHalPZibuOjkNZFuvhMWTwDKYbwoFxqophGDMBKMQdCnOjJQxIIwhxnGDS6E2EwXSDMUZSlkAZKP7DxfCYnDwAIAynPYBg0OtoKwaH7CQoFXoD84gcQQUSywQc92jEoVHOlTRQhVpogDE5WYFhwamk2AT1pkHsBWiSiSkNIBggTAwR3lTiZ2lfAmETiV

In [None]:
# Save data to csv file
# columns: 'textID', 'text'
import pandas as pd
from datetime import datetime, timedelta

keyword = 'Uber'
lang = 'en'
until = '2021-01-13'
since = '2020-12-13'
# example_query = '%23Uber%20lang%3Aen%20until%3A2021-01-13%20since%3A2020-12-13'

def get_data(keyword, lang='en', until=datetime.today().strftime('%Y-%m-%d'), since=(datetime.today()-timedelta(90)).strftime('%Y-%m-%d')):
  query = f'%23{keyword}%20lang%3A{lang}%20until%3A{until}%20since%3A{since}'
  