# Facebook Posts - Source API
Get the source data for facebook posts from crowdtangle api.

Requires environment variable:
- CROWDTANGLE_API_TOKEN

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

import pandas as pd

from phoenix.common import artifacts
from phoenix.common import utils
from phoenix.scrape import scrape
from phoenix.scrape import crowdtangle

In [None]:
utils.setup_notebook_output()
utils.setup_notebook_logging()

In [None]:
# Parametrise the run execution date.
# Format of the run date
RUN_DATE_FORMAT = "%Y-%m-%d"
# This can be overwritten at execution time by Papermill to enable historic runs and backfills etc.
RUN_ISO_TIMESTAMP = datetime.datetime.now().isoformat()
run_iso_datetime = datetime.datetime.fromisoformat(RUN_ISO_TIMESTAMP)
RUN_DATE = run_iso_datetime.strftime(RUN_DATE_FORMAT)

# Set Artefacts URLs
ARTIFACT_SOURCE_FB_POSTS_URL = f"{artifacts.urls.get_local()}{RUN_DATE}/source_runs/posts-{RUN_ISO_TIMESTAMP}.json"
ARTIFACT_BASE_FB_POSTS_URL = f"{artifacts.urls.get_local()}base/facebook_posts/{RUN_ISO_TIMESTAMP}.json"

# The period you want to scrape
# Default to day before run date
SCRAPE_START_DATE = datetime.datetime.strptime(RUN_DATE, RUN_DATE_FORMAT) - datetime.timedelta(days=1)
SCRAPE_END_DATE = datetime.datetime.strptime(RUN_DATE, RUN_DATE_FORMAT)
SCRAPE_LIST_IDS = ["1531680"]

In [None]:
# Display params.
print(
ARTIFACT_SOURCE_FB_POSTS_URL,
ARTIFACT_BASE_FB_POSTS_URL,
RUN_ISO_TIMESTAMP,
RUN_DATE,
SCRAPE_START_DATE,
SCRAPE_END_DATE,
SCRAPE_LIST_IDS,
sep='\n',
)

In [None]:
# Scrape code will go here
posts = crowdtangle.get_all_posts(SCRAPE_START_DATE, SCRAPE_END_DATE, SCRAPE_LIST_IDS)
len(posts)

In [None]:
posts[0:1]

In [None]:
artifacts.json.persist(ARTIFACT_SOURCE_FB_POSTS_URL, posts)

In [None]:
artifacts.json.persist(ARTIFACT_BASE_FB_POSTS_URL, posts)