In [None]:
!pip install requests

In [None]:
!pip install pandas

API Docs: https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent

Example using Python: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/main/Recent-Search/recent_search.py



In [1]:
import os
import json
from datetime import datetime
from typing import Callable

import requests
import pandas as pd

In [2]:
# Twitter
SEARCH_URL = "https://api.twitter.com/2/tweets/search/recent"
BEARER_TOKEN = "<bearer_token>"

## Auth

In [3]:
def bearer_oauth(r):
    """Method required by bearer token authentication."""

    r.headers["Authorization"] = f"Bearer {BEARER_TOKEN}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

## Search recent tweets

In [4]:
def build_params(start_time: str, max_results: int = 10) -> dict:
    return {
        "query": "(atardecer OR ocaso OR sunset OR #sunset) has:images",
        "media.fields": "type,url,media_key",
        "expansions": "attachments.media_keys",
        "start_time": start_time,
        "max_results": max_results,
    }

In [5]:
def connect_to_endpoint(url: str, params: dict, auth_fn: Callable):
    response = requests.get(url, auth=auth_fn, params=params)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [41]:
# YYYY-MM-DDTHH:mm:ssZ
TIME_SUFFIX = "T00:00:00Z"

QUERY_TWEET_DATES = [
    date + TIME_SUFFIX 
    for date in 
    [
        "2022-09-12",
        "2022-09-13",
        "2022-09-14",
        "2022-09-15",
    ]
]

In [42]:
def build_end_time() -> str:
    for date in QUERY_TWEET_DATES:
        yield date 

In [43]:
gen_date = build_end_time()

In [50]:
start_time = next(gen_date)
print(start_time)

response = connect_to_endpoint(
    url=SEARCH_URL,
    params=build_params(start_time=start_time, max_results=50),
    auth_fn=bearer_oauth
)

2022-09-14T00:00:00Z
200


In [51]:
print(json.dumps(response, indent=4, sort_keys=True))

{
    "data": [
        {
            "id": "1570587460815720448",
            "text": "RT @StormiFolf: \ud83d\udc99Sona vs owner!\ud83d\udc9a\n\ud83c\udfa8@/scorpion_sunset https://t.co/HgcuYKzasa"
        },
        {
            "id": "1570587451680518146",
            "text": "RT @k_s_h_m_s: theplus beauty(@theplusbeauty)\u69d8\u3088\u308a\ud83e\udd0d\n\nJUL7ME\n\u30d1\u30d5\u30e5\u30fc\u30e0\u30ce\u30f3\u30a6\u30a9\u30c3\u30b7\u30e5\u30d8\u30a2\u30d1\u30c3\u30af\n\u3092\u9802\u304d\u307e\u3057\u305f\ud80c\udc83\ud83e\udd0d\ud80c\ude12\ud80c\udff8\n\n\u61a7\u308c\u306e\u9999\u6c34\u306e\u9999\u308a\u306b\u6fc0\u4f3c\uff01\n\u3068\u3001\u8a71\u984c\u306eJUL7ME\ud83e\udd0d\n\n\u79c1\u304c\u9802\u3044\u305fSUNSET FREESIA\u306f\n\u723d\u3084\u304b\u2026"
        },
        {
            "attachments": {
                "media_keys": [
                    "3_1570138004357783552"
                ]
            },
            "id": "1570587443434508289",
            "text": "RT @nayarabzp:

In [52]:
def extract_data_from_response(raw_data: dict) -> pd.DataFrame:
    data = list()

    for item in raw_data["includes"]["media"]:
        if item["type"] != "photo":
            continue

        data.append([item["media_key"], item["type"], item["url"]])

    return pd.DataFrame(data, columns=["media_key", "type", "url"])

In [53]:
df = extract_data_from_response(response)

In [54]:
df

Unnamed: 0,media_key,type,url
0,3_1570138004357783552,photo,https://pbs.twimg.com/media/FcpAMi4WIAAouvn.jpg
1,3_1570535448556048384,photo,https://pbs.twimg.com/media/Fcupq15XkAAQIeT.jpg
2,3_1570399545292464128,photo,https://pbs.twimg.com/media/FcsuEOyXoAAzSoO.jpg
3,3_1570585664768327680,photo,https://pbs.twimg.com/media/FcvXVz3WQAANnTD.jpg
4,3_1570553018889691137,photo,https://pbs.twimg.com/media/Fcu5pkfWYAEHkNz.jpg
5,3_1569856459076243456,photo,https://pbs.twimg.com/media/FclAIc-XkAAQB3a.jpg
6,3_1570582030483705858,photo,https://pbs.twimg.com/media/FcvUCRGX0AI3LRn.jpg
7,3_1570443276792270848,photo,https://pbs.twimg.com/media/FctV1vUakAAsTiW.jpg
8,3_1570350296995430401,photo,https://pbs.twimg.com/media/FcsBRmlXgAEsZRC.jpg
9,3_1460951611627302916,photo,https://pbs.twimg.com/media/FEZXwBUUcAQe9EN.jpg


In [None]:
all_data = df.copy() # only run once
all_data

In [55]:
all_data = pd.concat([all_data, df], axis=0)
all_data

Unnamed: 0,media_key,type,url
0,3_1570585015888609281,photo,https://pbs.twimg.com/media/FcvWwCmXgAEbnri.jpg
1,3_1570584995974221824,photo,https://pbs.twimg.com/media/FcvWu4aaEAAqvYW.jpg
2,3_1569457495214522372,photo,https://pbs.twimg.com/media/FcfVRsxWAAQ_Rmz.jpg
3,3_1570068174669844480,photo,https://pbs.twimg.com/media/FcoAr7BXkAAeNub.jpg
4,3_1570446210690945029,photo,https://pbs.twimg.com/media/FctYgg8XkAU2SJ9.jpg
...,...,...,...
24,3_1570586231410462721,photo,https://pbs.twimg.com/media/FcvX2yxXEAE31Dp.jpg
25,3_1570586237668413441,photo,https://pbs.twimg.com/media/FcvX3KFX0AEuBsZ.jpg
26,3_1570586773587374080,photo,https://pbs.twimg.com/media/FcvYWWiaMAAcHfl.jpg
27,3_1570586774807711744,photo,https://pbs.twimg.com/media/FcvYWbFXEAAZbsB.jpg


In [59]:
all_data["url"].describe()

count                                                 128
unique                                                 82
top       https://pbs.twimg.com/media/FcpAMi4WIAAouvn.jpg
freq                                                    4
Name: url, dtype: object

In [60]:
all_data.drop_duplicates(subset=["url"], inplace=True)
all_data

Unnamed: 0,media_key,type,url
0,3_1570585015888609281,photo,https://pbs.twimg.com/media/FcvWwCmXgAEbnri.jpg
1,3_1570584995974221824,photo,https://pbs.twimg.com/media/FcvWu4aaEAAqvYW.jpg
2,3_1569457495214522372,photo,https://pbs.twimg.com/media/FcfVRsxWAAQ_Rmz.jpg
3,3_1570068174669844480,photo,https://pbs.twimg.com/media/FcoAr7BXkAAeNub.jpg
4,3_1570446210690945029,photo,https://pbs.twimg.com/media/FctYgg8XkAU2SJ9.jpg
...,...,...,...
2,3_1570399545292464128,photo,https://pbs.twimg.com/media/FcsuEOyXoAAzSoO.jpg
3,3_1570585664768327680,photo,https://pbs.twimg.com/media/FcvXVz3WQAANnTD.jpg
4,3_1570553018889691137,photo,https://pbs.twimg.com/media/Fcu5pkfWYAEHkNz.jpg
7,3_1570443276792270848,photo,https://pbs.twimg.com/media/FctV1vUakAAsTiW.jpg


In [65]:
all_data.to_csv("all_data_until_15_sept.csv", index=False)

In [35]:
for idx, row in df.iterrows():
    r = requests.get(row["url"], stream=True)

    if r.status_code == 200:
        with open(f"example_{idx}.png", "wb") as f:
            f.write(r.content)