In [None]:
!pip install requests

In [None]:
!pip install pandas

API Docs: https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent

Example using Python: https://github.com/twitterdev/Twitter-API-v2-sample-code/blob/main/Recent-Search/recent_search.py



In [1]:
import os
import json
from datetime import datetime
from typing import Callable

import requests
import pandas as pd

In [2]:
# Twitter
SEARCH_URL = "https://api.twitter.com/2/tweets/search/recent"
BEARER_TOKEN = "<bearer-token>"

## Auth

In [3]:
def bearer_oauth(r):
    """Method required by bearer token authentication."""

    r.headers["Authorization"] = f"Bearer {BEARER_TOKEN}"
    r.headers["User-Agent"] = "v2RecentSearchPython"
    return r

## Search recent tweets

In [4]:
def build_params(start_time: str, max_results: int = 10) -> dict:
    return {
        "query": "(atardecer OR ocaso OR sunset OR #sunset) has:images",
        "media.fields": "type,url,media_key",
        "expansions": "attachments.media_keys",
        "start_time": start_time,
        "max_results": max_results,
    }

In [5]:
def connect_to_endpoint(url: str, params: dict, auth_fn: Callable):
    response = requests.get(url, auth=auth_fn, params=params)
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(response.status_code, response.text)
    return response.json()

In [12]:
start_time = "2022-09-17T00:00:00Z" # YYYY-MM-DDTHH-mm-ss 

response = connect_to_endpoint(
    url=SEARCH_URL,
    params=build_params(start_time=start_time, max_results=50),
    auth_fn=bearer_oauth
)

200


In [13]:
print(json.dumps(response, indent=4, sort_keys=True))

{
    "data": [
        {
            "attachments": {
                "media_keys": [
                    "3_1571298500519907332"
                ]
            },
            "id": "1571615460667363328",
            "text": "RT @uhd2020: Sunset from space! https://t.co/Kdzd26we4M"
        },
        {
            "attachments": {
                "media_keys": [
                    "3_1571615458071027714"
                ]
            },
            "id": "1571615460113752064",
            "text": "Bring those warm, fall sunset colors into your home this year, because everyone knows sunsets are better in fall.\nAMERICAN DREAM REALTY~Where Dream Homes Come True! 803-687-8022 https://t.co/jXd5HPDqBM"
        },
        {
            "attachments": {
                "media_keys": [
                    "3_1571564982109618177"
                ]
            },
            "id": "1571615452580790275",
            "text": "RT @AndrewBowyer4: Magdalen Tower through the Botanic Gardens at sunset

In [14]:
def extract_data_from_response(raw_data: dict) -> pd.DataFrame:
    data = list()

    for item in raw_data["includes"]["media"]:
        if item["type"] != "photo":
            continue

        data.append([item["media_key"], item["type"], item["url"]])

    return pd.DataFrame(data, columns=["media_key", "type", "url"])

In [15]:
df = extract_data_from_response(response)

In [16]:
df

Unnamed: 0,media_key,type,url
0,3_1571298500519907332,photo,https://pbs.twimg.com/media/Fc5fqT-X0AQq5tb.jpg
1,3_1571615458071027714,photo,https://pbs.twimg.com/media/Fc9_7q3WIAIC3PM.jpg
2,3_1571564982109618177,photo,https://pbs.twimg.com/media/Fc9SBlQWQAEdabJ.jpg
3,3_1571605018326798337,photo,https://pbs.twimg.com/media/Fc92b_yXEAE3UJm.jpg
4,3_1571615407554936835,photo,https://pbs.twimg.com/media/Fc9_4urXwAMDa-U.jpg
5,3_1571589270699114498,photo,https://pbs.twimg.com/media/Fc9oHXTWAAIAdJY.jpg
6,3_1571615309353697280,photo,https://pbs.twimg.com/media/Fc9_zA2XwAATcQg.jpg
7,3_1571615307000647681,photo,https://pbs.twimg.com/media/Fc9_y4FXEAE7dph.jpg
8,3_1571544010488954880,photo,https://pbs.twimg.com/media/Fc8-834WQAAyuVb.jpg
9,3_1571394554493865984,photo,https://pbs.twimg.com/media/Fc63BY6XEAAkRpZ.jpg


In [None]:
all_data = df.copy() # only run once
all_data

In [None]:
all_data = pd.concat([all_data, df], axis=0)
all_data

In [18]:
all_data["url"].describe()

count                                                  56
unique                                                 44
top       https://pbs.twimg.com/media/FcuMeS2WAAAS4e4.jpg
freq                                                    2
Name: url, dtype: object

In [None]:
all_data.drop_duplicates(subset=["url"], inplace=True)
all_data

In [65]:
all_data.to_csv("all_data_until_15_sept.csv", index=False)

# Save new data

In [20]:
older_data = pd.read_csv("all_data_until_15_sept.csv")
older_data

Unnamed: 0,media_key,type,url
0,3_1570585015888609281,photo,https://pbs.twimg.com/media/FcvWwCmXgAEbnri.jpg
1,3_1570584995974221824,photo,https://pbs.twimg.com/media/FcvWu4aaEAAqvYW.jpg
2,3_1569457495214522372,photo,https://pbs.twimg.com/media/FcfVRsxWAAQ_Rmz.jpg
3,3_1570068174669844480,photo,https://pbs.twimg.com/media/FcoAr7BXkAAeNub.jpg
4,3_1570446210690945029,photo,https://pbs.twimg.com/media/FctYgg8XkAU2SJ9.jpg
...,...,...,...
77,3_1570399545292464128,photo,https://pbs.twimg.com/media/FcsuEOyXoAAzSoO.jpg
78,3_1570585664768327680,photo,https://pbs.twimg.com/media/FcvXVz3WQAANnTD.jpg
79,3_1570553018889691137,photo,https://pbs.twimg.com/media/Fcu5pkfWYAEHkNz.jpg
80,3_1570443276792270848,photo,https://pbs.twimg.com/media/FctV1vUakAAsTiW.jpg


In [21]:
all_data = pd.concat([all_data, older_data], axis=0)
all_data

Unnamed: 0,media_key,type,url
0,3_1570503347152486400,photo,https://pbs.twimg.com/media/FcuMeS2WAAAS4e4.jpg
1,3_1571298500519907332,photo,https://pbs.twimg.com/media/Fc5fqT-X0AQq5tb.jpg
2,3_1571615114951802894,photo,https://pbs.twimg.com/media/Fc9_nspWQA47YB0.jpg
3,3_1571560989912440834,photo,https://pbs.twimg.com/media/Fc9OZNKXoAIor7b.jpg
4,3_1570582030483705858,photo,https://pbs.twimg.com/media/FcvUCRGX0AI3LRn.jpg
...,...,...,...
77,3_1570399545292464128,photo,https://pbs.twimg.com/media/FcsuEOyXoAAzSoO.jpg
78,3_1570585664768327680,photo,https://pbs.twimg.com/media/FcvXVz3WQAANnTD.jpg
79,3_1570553018889691137,photo,https://pbs.twimg.com/media/Fcu5pkfWYAEHkNz.jpg
80,3_1570443276792270848,photo,https://pbs.twimg.com/media/FctV1vUakAAsTiW.jpg


In [23]:
all_data.reset_index(inplace=True)
all_data

Unnamed: 0,index,media_key,type,url
0,0,3_1570503347152486400,photo,https://pbs.twimg.com/media/FcuMeS2WAAAS4e4.jpg
1,1,3_1571298500519907332,photo,https://pbs.twimg.com/media/Fc5fqT-X0AQq5tb.jpg
2,2,3_1571615114951802894,photo,https://pbs.twimg.com/media/Fc9_nspWQA47YB0.jpg
3,3,3_1571560989912440834,photo,https://pbs.twimg.com/media/Fc9OZNKXoAIor7b.jpg
4,4,3_1570582030483705858,photo,https://pbs.twimg.com/media/FcvUCRGX0AI3LRn.jpg
...,...,...,...,...
121,77,3_1570399545292464128,photo,https://pbs.twimg.com/media/FcsuEOyXoAAzSoO.jpg
122,78,3_1570585664768327680,photo,https://pbs.twimg.com/media/FcvXVz3WQAANnTD.jpg
123,79,3_1570553018889691137,photo,https://pbs.twimg.com/media/Fcu5pkfWYAEHkNz.jpg
124,80,3_1570443276792270848,photo,https://pbs.twimg.com/media/FctV1vUakAAsTiW.jpg


In [24]:
all_data.to_csv("all_data_until_17_sept.csv", index=False)