# Twitter data pull for retweets network graph
Pull data from a folder of json files and structure it for a retweets visualization.

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

import pandas as pd

from phoenix.common import artifacts, run_datetime
from phoenix.common import utils
from phoenix.scrape import twitter_utilities
from phoenix.tag.data_pull import twitter_pull_graphing

In [None]:
utils.setup_notebook_output()
utils.setup_notebook_logging()

In [None]:
# Parameters
# See phoenix/common/run_datetime.py expected format of parameter
RUN_DATETIME = None

# See phoenix/common/artifacts/registry_environment.py expected format of parameter
ARTIFACTS_ENVIRONMENT_KEY = "local"

# Filters for batch
YEAR_FILTER = 2021
# Without zero padding
MONTH_FILTER = 8

In [None]:
if RUN_DATETIME:
    run_dt = run_datetime.from_file_safe_str(RUN_DATETIME)
else:
    run_dt = run_datetime.create_run_datetime_now()
    
url_config = {
    "YEAR_FILTER": YEAR_FILTER,
    "MONTH_FILTER": MONTH_FILTER,
    "OBJECT_TYPE": "tweets"
}
art_url_reg = artifacts.registry.ArtifactURLRegistry(run_dt, ARTIFACTS_ENVIRONMENT_KEY)
GRAPHING_RUNS_URL_RETWEET_PULLED = art_url_reg.get_url("graphing_runs-retweet_pulled", url_config)
STATIC_URL_TWITTER_USERS = art_url_reg.get_url("static-twitter_users", url_config)
GRAPHING_RUNS_URL_RETWEET_INPUT = art_url_reg.get_url("graphing_runs-retweet_input", url_config)

In [None]:
# Display params.
print(
STATIC_URL_TWITTER_USERS,
GRAPHING_RUNS_URL_RETWEET_INPUT,
GRAPHING_RUNS_URL_RETWEET_PULLED,
run_dt.dt,
YEAR_FILTER,
MONTH_FILTER,
sep='\n',
)

In [None]:
# %env DASK_CLUSTER_IP=tcp://127.0.0.1:36349

In [None]:
utils.dask_global_init()

In [None]:
users = twitter_utilities.load_queries_from_csv(STATIC_URL_TWITTER_USERS)

In [None]:
tweets = twitter_pull_graphing.collect_tweets_rt_graph(GRAPHING_RUNS_URL_RETWEET_INPUT, users, YEAR_FILTER, MONTH_FILTER)

In [None]:
tweets.head()

In [None]:
tweets.size

In [None]:
tweets.shape

In [None]:
art = artifacts.dataframes.persist(GRAPHING_RUNS_URL_RETWEET_PULLED, tweets)