# Retrieving Tweets by Searchtag

In [1]:
import requests 
import json 
import pandas as pd

# imports the twitter_secrets python file in which we store the twitter API keys
from twitter_secrets import twitter_secrets as ts

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers
        
def set_rules(headers, delete, bearer_token, rules):
    payload = {"add": rules}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload,
    )
    if response.status_code != 201:
        raise Exception(
            "Cannot add rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    
def get_rules(headers, bearer_token):
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream/rules", headers=headers
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot get rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    return response.json()

def delete_all_rules(headers, bearer_token, rules):
    if rules is None or "data" not in rules:
        return None

    ids = list(map(lambda rule: rule["id"], rules["data"]))
    payload = {"delete": {"ids": ids}}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot delete rules (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    print(json.dumps(response.json()))

def get_stream(headers, set, bearer_token, expansions, fields, save_to_disk, save_path):
    data = []
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream" + expansions + fields, headers=headers, stream=True,
    )
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Cannot get stream (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    i = 0
    for response_line in response.iter_lines():
        i += 1
        if i == max_results:
            break
        else:
            json_response = json.loads(response_line)
            #print(json.dumps(json_response, indent=4, sort_keys=True))
            try:
                save_tweets(json_response)
                if save_to_disk == True:
                    save_media_to_disk(json_response, save_path)
            except (json.JSONDecodeError, KeyError) as err:
                # In case the JSON fails to decode, we skip this tweet
                print(f"{i}/{max_results}: ERROR: encountered a problem with a line of data... \n")
                continue
                
def save_tweets(tweet):
    print(json.dumps(tweet, indent=4, sort_keys=True))
    data = tweet['data']
    public_metrics = data['public_metrics']
    tweet_list.append([data['id'], data['author_id'], data['created_at'], data['text'], public_metrics['like_count']])

# the max number of tweets that will be returned
max_results = 20

# save to disk
save_media_to_disk = False
save_path = ""

# You can adjust the rules if needed
search_rules = [
    {"value": "dog has:images", "tag": "dog pictures", "lang": "en"},
    {"value": "cat has:images -grumpy", "tag": "cat pictures", "lang": "en"},
]
tweet_fields = "?tweet.fields=attachments,author_id,created_at,public_metrics"
expansions = ""
tweet_list = []


bearer_token = ts.BEARER_TOKEN
headers = create_headers(bearer_token)
rules = get_rules(headers, bearer_token)
delete = delete_all_rules(headers, bearer_token, rules)
set = set_rules(headers, delete, bearer_token, search_rules)
get_stream(headers, set, bearer_token, expansions, tweet_fields, save_media_to_disk, save_path)

df = pd.DataFrame (tweet_list, columns = ['tweetid', 'author_id' , 'created_at', 'text', 'like_count'])
df

{"data": [{"id": "1345870075308498945", "value": "dog has:images", "tag": "dog pictures"}], "meta": {"sent": "2021-01-03T23:10:16.243Z"}}
{"meta": {"sent": "2021-01-03T23:10:17.433Z", "summary": {"deleted": 1, "not_deleted": 0}}}
{"data": [{"value": "cat has:images -grumpy", "tag": "cat pictures", "id": "1345870125426208768"}, {"value": "dog has:images", "tag": "dog pictures", "id": "1345870125426208769"}], "meta": {"sent": "2021-01-03T23:10:18.990Z", "summary": {"created": 2, "not_created": 0, "valid": 2, "invalid": 0}}}
200
{
    "data": {
        "author_id": "878427720711114753",
        "created_at": "2021-01-03T23:10:09.000Z",
        "id": "1345870092064722944",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "text": "Q t\u00e1 acontecendo??? https://t.co/MAbK5txJ6n"
    },
    "matching_rules": [
        {
            "id": 1345870125426208768,
            "tag

{
    "data": {
        "attachments": {
            "media_keys": [
                "3_1345870106740609026"
            ]
        },
        "author_id": "1253103269259419648",
        "created_at": "2021-01-03T23:10:13.000Z",
        "id": "1345870110091862016",
        "public_metrics": {
            "like_count": 0,
            "quote_count": 0,
            "reply_count": 0,
            "retweet_count": 0
        },
        "text": "i did my makeup for the first time in like a year and a half also look at my cat https://t.co/aB8BT21X7M"
    },
    "matching_rules": [
        {
            "id": 1345870125426208768,
            "tag": "cat pictures"
        }
    ]
}
{
    "data": {
        "attachments": {
            "media_keys": [
                "3_1344783852796051456",
                "3_1344783857728577541"
            ]
        },
        "author_id": "24780491",
        "created_at": "2021-01-03T23:10:15.000Z",
        "id": "1345870116286849024",
        "public_metrics": 

JSONDecodeError: Expecting value: line 1 column 1 (char 0)