# 1 Define Functions to Interact with the Twitter API

In [1]:
import requests 
import json 
import pandas as pd

# imports the twitter_secrets python file in which we store the twitter API keys
# place the twitter_secrets file under <User>/anaconda3/Lib
from twitter_secrets import twitter_secrets as ts

# puts the bearer token in the request header
def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers
        
# sets the rules on which tweets to retrieve    
def set_rules(headers, delete, bearer_token, rules):
    payload = {"add": rules}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload,
    )
    if response.status_code != 201:
        raise Exception(
            "Cannot add rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    
# retrieves the current set of rules from the API     
def get_rules(headers, bearer_token):
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream/rules", headers=headers
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot get rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    return response.json()

# tells the API to delete our current rule configuration 
def delete_all_rules(headers, bearer_token, rules):
    if rules is None or "data" not in rules:
        return None

    ids = list(map(lambda rule: rule["id"], rules["data"]))
    payload = {"delete": {"ids": ids}}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot delete rules (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    print(json.dumps(response.json()))

# starts the stream, iterates through the lines of the response and for each line calls the save_tweets function
def get_stream(headers, set, bearer_token, expansions, fields):
    data = []
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream" + expansions + fields, headers=headers, stream=True,
    )
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Cannot get stream (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    i = 0
    for response_line in response.iter_lines():
        i += 1
        if i == max_results:
            break
        else:
            json_response = json.loads(response_line)
            #print(json.dumps(json_response, indent=4, sort_keys=True))
            try:
                save_tweets(json_response)
            except (json.JSONDecodeError, KeyError) as err:
                # In case the JSON fails to decode, we skip this tweet
                print(f"{i}/{max_results}: ERROR: encountered a problem with a line of data... \n")
                continue

# appends information from tweets to a dataframe                
def save_tweets(tweet):
    print(json.dumps(tweet, indent=4, sort_keys=True))
    data = tweet['data']
    public_metrics = data['public_metrics']
    tweet_list.append([data['id'], data['author_id'], data['created_at'], data['text'], public_metrics['like_count']])


# 2 Subscribe to the Tweet Streaming Service

In [2]:
# the max number of tweets that will be returned
max_results = 20

# You can adjust the rules if needed
search_rules = [
    {"value": "dog has:images", "tag": "dog pictures", "lang": "en"},
    {"value": "cat has:images -grumpy", "tag": "cat pictures", "lang": "en"},
]

# defines the fields which we want to retrieve
tweet_fields = "?tweet.fields=attachments,author_id,created_at,public_metrics"

# we only retrieve the tweet object, but if we wanted to retrieve other objects (e.g., media), we would add them to the expansions string
expansions = ""
tweet_list = []

bearer_token = ts.BEARER_TOKEN
headers = create_headers(bearer_token)
rules = get_rules(headers, bearer_token)
delete = delete_all_rules(headers, bearer_token, rules)
set = set_rules(headers, delete, bearer_token, search_rules)
get_stream(headers, set, bearer_token, expansions, tweet_fields)

df = pd.DataFrame (tweet_list, columns = ['tweetid', 'author_id' , 'created_at', 'text', 'like_count'])
df

{"data": [{"id": "1388866121256869898", "value": "dog has:images", "tag": "dog pictures"}], "meta": {"sent": "2021-05-09T14:38:06.574Z"}}
{"meta": {"sent": "2021-05-09T14:38:07.827Z", "summary": {"deleted": 1, "not_deleted": 0}}}
{"data": [{"value": "cat has:images -grumpy", "tag": "cat pictures", "id": "1391402107169918977"}, {"value": "dog has:images", "tag": "dog pictures", "id": "1391402107169918976"}], "meta": {"sent": "2021-05-09T14:38:09.437Z", "summary": {"created": 2, "not_created": 0, "valid": 2, "invalid": 0}}}
200
{
    "data": {
        "author_id": "1381223929226743812",
        "created_at": "2021-05-09T14:37:59.000Z",
        "id": "1391402072084385798",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 437
        },
        "text": "RT @CiervoKing: $50 in 3 hours \ud83d\udc97\n\n\u25aa\ufe0fRetweet this\n\u25aa\ufe0fGo to this telegram &amp; VOTE for (COVID DOG) https://t.c

}


KeyboardInterrupt: 