# Gather a large number of tweets from a specified period. Only from a week ago due to endpoint restrictions.

In [81]:
import sys
import requests
import os
import json
import datetime as dt
import pandas as pd
from pprint import pprint
import pickle

ENDPOINT = "https://api.twitter.com/2/tweets/search/recent"

## Function that interfaces with Twitter's recent tweets endpoint and pulls tweets until `num_tweets` limit is hit or when it runs out pages to paginate.

In [76]:
def gather_tweets(keywords, start, end, num_tweets=1000):

    token = open('bearer_token.txt', 'r').read()
    headers = {"Authorization": f"Bearer {token}"}

    tweet_fields = "id,created_at,author_id"
    max_results = 100 # Tweets per request.
    tweet_constraints = " ".join([
        str(keywords),
        "-is:retweet",
        "-has:media",
        "-has:images",
        "-has:links",
        "-is:reply",
        "lang:en"
    ])

    tweets_df = pd.DataFrame()
    
    next = None

    while tweets_df.shape[0] < num_tweets and next != -1:

        request_parameters = "&".join([
            f"query={tweet_constraints}",
            f"tweet.fields={tweet_fields}",
            f"max_results={max_results}",
            f"start_time={start.isoformat()}Z",
            f"end_time={end.isoformat()}Z"
        ])

        # Seeing whether to go to the next page.
        if next is not None:
            request_parameters += f"&next_token={next}"

        # Complete the URL.
        url = f"{ENDPOINT}?{request_parameters}"

        response = requests.request("GET", url, headers=headers)

        if response.status_code != 200:
            raise Exception(response.status_code, response.text)

        response_json = response.json()

        # Checking if the next_token exists to move onto more tweets.
        next = response_json['meta']['next_token'] \
            if 'next_token' in response_json['meta'].keys() else -1

        data = response_json['data']

        tweets_df = tweets_df.append(data, ignore_index=True)

    return tweets_df

## Playground

In [77]:
df = gather_tweets("millennials", dt.datetime(2021, 4, 15), dt.datetime(2021, 4, 20))

In [83]:
pickle.dump(df, open('tweets/coffee.pkl', 'wb'))