In [None]:
!pip install requests

In [None]:
!pip install pandas

API Docs: https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent

Example using Python: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/main/Recent-Search/recent_search.py



In [1]:
import os
import json
from datetime import datetime
from typing import Callable

import requests
import pandas as pd

In [2]:
# Twitter
SEARCH_URL = "https://api.twitter.com/2/tweets/search/recent"
BEARER_TOKEN = "<bearer-token>"

## Auth

In [3]:
def bearer_oauth(r):
    """Method required by bearer token authentication."""

    r.headers["Authorization"] = f"Bearer {BEARER_TOKEN}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

## Search recent tweets

In [4]:
def build_params(start_time: str, max_results: int = 10) -> dict:
    return {
        "query": "(atardecer OR ocaso OR sunset OR #sunset) has:images",
        "media.fields": "type,url,media_key",
        "expansions": "attachments.media_keys",
        "start_time": start_time,
        "max_results": max_results,
    }

In [5]:
def connect_to_endpoint(url: str, params: dict, auth_fn: Callable):
    response = requests.get(url, auth=auth_fn, params=params)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [12]:
start_time = "2022-09-17T00:00:00Z" # YYYY-MM-DDTHH-mm-ss 

response = connect_to_endpoint(
    url=SEARCH_URL,
    params=build_params(start_time=start_time, max_results=50),
    auth_fn=bearer_oauth
)

200


In [None]:
print(json.dumps(response, indent=4, sort_keys=True))

In [14]:
def extract_data_from_response(raw_data: dict) -> pd.DataFrame:
    data = list()

    for item in raw_data["includes"]["media"]:
        if item["type"] != "photo":
            continue

        data.append([item["media_key"], item["type"], item["url"]])

    return pd.DataFrame(data, columns=["media_key", "type", "url"])

In [15]:
df = extract_data_from_response(response)

In [None]:
df

In [None]:
all_data = df.copy() # only run once
all_data

In [None]:
all_data = pd.concat([all_data, df], axis=0)
all_data

In [18]:
all_data["url"].describe()

count                                                  56
unique                                                 44
top       https://pbs.twimg.com/media/FcuMeS2WAAAS4e4.jpg
freq                                                    2
Name: url, dtype: object

In [None]:
all_data.drop_duplicates(subset=["url"], inplace=True)
all_data

In [65]:
all_data.to_csv("all_data_until_15_sept.csv", index=False)

# Save new data

In [None]:
older_data = pd.read_csv("all_data_until_15_sept.csv")
older_data

In [None]:
all_data = pd.concat([all_data, older_data], axis=0)
all_data

In [None]:
all_data.reset_index(inplace=True)
all_data

In [24]:
all_data.to_csv("all_data_until_17_sept.csv", index=False)