# Twitter Keyword Search

Get data from twitter based on keyword queries

Requires enviornment variables:
- TWITTER_CONSUMER_KEY
- TWITTER_CONSUMER_SECRET
- TWITTER_APPLICATION_TOKEN
- TWITTER_APPLICATION_SECRET

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

from phoenix.scrape import twitter_queries
from phoenix.scrape import twitter_utilities
from phoenix.scrape import scrape

from phoenix.common import artifacts
from phoenix.common import utils

In [None]:
utils.setup_notebook_output()
utils.setup_notebook_logging()

In [None]:
# Parametrise the run execution date.
# Format of the run date
RUN_DATE_FORMAT = "%Y-%m-%d"
# This can be overwritten at execution time by Papermill to enable historic runs and backfills etc.
RUN_ISO_TIMESTAMP = datetime.datetime.now().isoformat()
run_iso_datetime = datetime.datetime.fromisoformat(RUN_ISO_TIMESTAMP)
RUN_DATE = datetime.datetime.today().strftime(RUN_DATE_FORMAT)

# Set Artefacts URL
aur = artifacts.registry.ArtifactURLRegistry(run_iso_datetime, "local")
ARTIFACT_SOURCE_TWEETS_URL = aur.get_url("source-keyword_tweets")
ARTIFACT_BASE_TWEETS_URL = aur.get_url("base-to_process_keyword_tweets")

#The type of query: "users" or "keywords"
QUERY_TYPE = "keywords"
QUERIES_CSV_PATH = f"{artifacts.urls.get_static_config()}twitter_query_keywords.csv"
# The query: list of users for "users" query and list of keyword queries for "keywords"
# Number of days back to scrape
SINCE_DAYS = 3
# Total number of items, 0 for infinite
NUM_ITEMS = 0 

In [None]:
print(
ARTIFACT_SOURCE_TWEETS_URL,
ARTIFACT_BASE_TWEETS_URL,
RUN_ISO_TIMESTAMP,
RUN_DATE,
QUERY_TYPE,
QUERIES_CSV_PATH,
SINCE_DAYS,
NUM_ITEMS,
sep='\n')

In [None]:
queries = twitter_utilities.load_queries_from_csv(QUERIES_CSV_PATH)
print(queries)

In [None]:
tweets_json=twitter_queries.get_tweets_json(QUERY_TYPE, queries, NUM_ITEMS, SINCE_DAYS)
len(tweets_json)

In [None]:
a = artifacts.json.persist(ARTIFACT_SOURCE_TWEETS_URL, tweets_json);
a.url

In [None]:
a = artifacts.json.persist(ARTIFACT_BASE_TWEETS_URL, tweets_json);
a.url