# Facebook Posts - Source API
Get the source data for facebook posts from crowdtangle api.

Requires environment variables:
- CROWDTANGLE_API_TOKEN
- CT_RATE_LIMIT_CALLS

Requires tenant config attributes:
- `crowdtangle_scrape_list_id`

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import datetime

import pandas as pd

from phoenix.common import artifacts
from phoenix.common import utils
from phoenix.common import run_params
from phoenix.scrape import scrape
from phoenix.scrape import crowdtangle
from phoenix.scrape import group_by

In [None]:
utils.setup_notebook_output()
utils.setup_notebook_logging()

In [None]:
# Parameters
RUN_DATETIME = None
TENANT_ID = None

# Set Artefacts URLs
# ARTIFACTS_ENVIRONMENT_KEY will only effect the URLs that are not set
ARTIFACTS_ENVIRONMENT_KEY = "local"
ARTIFACT_SOURCE_FB_POSTS_URL = None

# The period you want to scrape
# Default three days before run date
SCRAPE_START_DATE = None
SCRAPE_END_DATE = None
SINCE_DAYS = None

In [None]:
# DEFAULTS
cur_run_params = run_params.general.create(ARTIFACTS_ENVIRONMENT_KEY, TENANT_ID, RUN_DATETIME)
if ARTIFACT_SOURCE_FB_POSTS_URL is None:
    ARTIFACT_SOURCE_FB_POSTS_URL = cur_run_params.art_url_reg.get_url("source-posts")
    
if SINCE_DAYS:
    SINCE_DAYS = int(SINCE_DAYS)
    SCRAPE_END_DATE = cur_run_params.run_dt.dt
    SCRAPE_START_DATE = cur_run_params.run_dt.dt - datetime.timedelta(days=int(SINCE_DAYS))

if SCRAPE_START_DATE is None:
    SCRAPE_START_DATE = cur_run_params.run_dt.dt - datetime.timedelta(days=3)
    
if SCRAPE_END_DATE is None:
    SCRAPE_END_DATE = cur_run_params.run_dt.dt

In [None]:
# Display params.
print(
cur_run_params,
ARTIFACT_SOURCE_FB_POSTS_URL,
cur_run_params.run_dt.dt,
SINCE_DAYS,
SCRAPE_START_DATE,
SCRAPE_END_DATE,
sep='\n',
)

In [None]:
# Scrape code will go here
posts = crowdtangle.get_all_posts(
    SCRAPE_START_DATE, 
    SCRAPE_END_DATE, 
    crowdtangle.process_scrape_list_id(cur_run_params.tenant_config.crowdtangle_scrape_list_id)
)
len(posts)

In [None]:
posts[0:1]

In [None]:
a = artifacts.json.persist(ARTIFACT_SOURCE_FB_POSTS_URL, posts)
a.url

In [None]:
all_persisted = group_by.persist_facebook_posts(cur_run_params.art_url_reg, posts, SCRAPE_START_DATE, SCRAPE_END_DATE)
for a in all_persisted:
    print(a.url)