# Twitter Keyword Search

Get data from twitter based on keyword queries.

See `docs/scrape_overview.md` for more information.

Requires enviornment variables:
- TWITTER_CONSUMER_KEY
- TWITTER_CONSUMER_SECRET
- TWITTER_APPLICATION_TOKEN
- TWITTER_APPLICATION_SECRET

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

from phoenix.scrape import twitter_queries
from phoenix.scrape import twitter_utilities
from phoenix.scrape import scrape
from phoenix.scrape import group_by
from phoenix.common import run_datetime
from phoenix.common import artifacts
from phoenix.common import utils

In [None]:
utils.setup_notebook_output()
utils.setup_notebook_logging()

In [None]:
# Parameters
RUN_DATETIME = None

ARTIFACTS_ENVIRONMENT_KEY = "local"
ARTIFACT_SOURCE_TWEETS_URL = None
STATIC_URL_QUERIES = None

#The type of query: "user" or "keyword"
QUERY_TYPE = "keyword"

# Number of days back to scrape
SINCE_DAYS = 3
# Total number of items, 0 for infinite
NUM_ITEMS = 0 

In [None]:
# DEFAULTS
if RUN_DATETIME:
    run_dt = run_datetime.from_file_safe_str(RUN_DATETIME)
else:
    run_dt = run_datetime.create_run_datetime_now()
    
aur = artifacts.registry.ArtifactURLRegistry(run_dt, ARTIFACTS_ENVIRONMENT_KEY)
# Set Artefacts URL
if ARTIFACT_SOURCE_TWEETS_URL is None:
    ARTIFACT_SOURCE_TWEETS_URL = aur.get_url("source-keyword_tweets")
    
if SINCE_DAYS:
    SINCE_DAYS = int(SINCE_DAYS)

if not STATIC_URL_QUERIES:
    STATIC_URL_QUERIES = aur.get_url("static-twitter_keywords")

In [None]:
print(
ARTIFACT_SOURCE_TWEETS_URL,
run_dt.dt,
QUERY_TYPE,
STATIC_URL_QUERIES,
SINCE_DAYS,
NUM_ITEMS,
sep='\n')

In [None]:
queries = twitter_utilities.load_queries_from_csv(STATIC_URL_QUERIES)
print(queries)

In [None]:
tweets_json=twitter_queries.get_tweets_json(QUERY_TYPE, queries, NUM_ITEMS, SINCE_DAYS)
len(tweets_json)

In [None]:
a = artifacts.json.persist(ARTIFACT_SOURCE_TWEETS_URL, tweets_json);
a.url

In [None]:
all_persisted = group_by.persist_tweets(aur, "keyword", tweets_json, run_dt, SINCE_DAYS)
for a in all_persisted:
    print(a.url)