In [1]:
import requests
import os
import json
from minio import Minio
import io
from dotenv import load_dotenv

load_dotenv()  # take environment variables from .env.

# To set your enviornment variables in your terminal run the following line:
# export 'BEARER_TOKEN'='<your_bearer_token>'
bearer_token = os.environ.get("TWITTER_BEARER_TOKEN")

def bearer_oauth(r):
    """
    Method required by bearer token authentication.
    """

    r.headers["Authorization"] = f"Bearer {bearer_token}"
    r.headers["User-Agent"] = "v2FilteredStreamPython"
    return r


def get_rules():
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream/rules", auth=bearer_oauth
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot get rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    return response.json()


def delete_all_rules(rules):
    if rules is None or "data" not in rules:
        return None

    ids = list(map(lambda rule: rule["id"], rules["data"]))
    payload = {"delete": {"ids": ids}}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        auth=bearer_oauth,
        json=payload
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot delete rules (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    print(json.dumps(response.json()))


def set_rules(delete):
    # You can adjust the rules if needed
    sample_rules = [
        {"value": "dog has:images", "tag": "dog pictures"},
        {"value": "cat has:images -grumpy", "tag": "cat pictures"},
    ]
    payload = {"add": sample_rules}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        auth=bearer_oauth,
        json=payload,
    )
    if response.status_code != 201:
        raise Exception(
            "Cannot add rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))


def get_stream(set):
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream", auth=bearer_oauth, stream=True,
    )
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Cannot get stream (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    i = 0
    for response_line in response.iter_lines():
        if response_line:
            json_response = json.loads(response_line)
            formatted = json.dumps(json_response, indent=4, sort_keys=True)
            print(formatted)
            s3_client.put_object(bucket, f"test_tweets/{i}.json", io.BytesIO(bytes(formatted, "utf-8")), len(formatted))



In [2]:
s3_access_key = os.environ.get("S3_ACCESS_KEY")
s3_secret_key = os.environ.get("S3_SECRET_KEY")
bucket = "twitter-data"
s3_client = Minio("localhost:9000", access_key=s3_access_key, secret_key=s3_secret_key, secure=False)

if not s3_client.bucket_exists(bucket):
    print(f"creating bucket '${bucket}'")
    s3_client.make_bucket(bucket)
else:
    print(f"bucket already exists")

bucket already exists


In [3]:
rules = get_rules()
delete = delete_all_rules(rules)
set = set_rules(delete)


{"data": [{"id": "1486156857785786368", "value": "cat has:images -grumpy", "tag": "cat pictures"}, {"id": "1486156857785786369", "value": "dog has:images", "tag": "dog pictures"}], "meta": {"sent": "2022-01-26T03:49:55.153Z", "result_count": 2}}
{"meta": {"sent": "2022-01-26T03:49:55.794Z", "summary": {"deleted": 2, "not_deleted": 0}}}
{"data": [{"value": "cat has:images -grumpy", "tag": "cat pictures", "id": "1486184603903684612"}, {"value": "dog has:images", "tag": "dog pictures", "id": "1486184603903684613"}], "meta": {"sent": "2022-01-26T03:49:56.255Z", "summary": {"created": 2, "not_created": 0, "valid": 2, "invalid": 0}}}


In [4]:
get_stream(set)

200
{
    "data": {
        "id": "1486184607242330117",
        "text": "RT @cat_bot_kr: \uc6b0\ub9ac\uc9d1 \uace0\uc591\uc774\ud55c\ud14c \uc0c8 \uc2dd\uad6c\uac00 \uc0dd\uae34\ub2e4\uace0 \ub9d0\ud574\uc8fc\ub294\uac78 \uae5c\ube61\ud588\ub2e4 https://t.co/d6f2TSC1IV"
    },
    "matching_rules": [
        {
            "id": "1486184603903684612",
            "tag": "cat pictures"
        }
    ]
}
{
    "data": {
        "id": "1486184614062292993",
        "text": "NBD big dog liked my tweet and I only speak in facts. How can he not be in Cooperstown. @garysheffield too! Both generational talent! Travesty! https://t.co/CDSuhuqRlM"
    },
    "matching_rules": [
        {
            "id": "1486184603903684613",
            "tag": "dog pictures"
        }
    ]
}
{
    "data": {
        "id": "1486184615820029952",
        "text": "RT @TamaZooPark: \u67ff\u306e\u6728\u306b\u3001\u5e72\u3057\u67ff\u306a\u3089\u306c\u3001\u5e72\u3057\u30bf\u30cc\u30ad\u304c\u4e8c\u3064\u3002\u8449\u

KeyboardInterrupt: 