In [None]:
import requests
import pandas as pd
from time import sleep

# user defined variables
domain = ""         # required
user_agent = ""     # optional

# define protocol & endpoint
protocol = "https://"
path = "/wp-json/wp/v2/posts"

# define parameters
per_page = 100
page = 1
status = "publish"
fields = "title,link,date,modified"
params = {
    'per_page': per_page, 
    'page': page,
    'status': status,
    '_fields': fields
}

# set user agent
headers = {'User-Agent': user_agent}

# remove trailing slash from domain
if domain.endswith("/"):
  domain = domain[:-1]
  
# build request URL
url = protocol + domain + path

# call API - store response
response = requests.request(
    "GET",
    url = url,
    params = params,
    headers = headers
    )

# check if response is valid
if response.status_code == 200:

  # store number of API response pages as an integer
  pages = int(response.headers['X-WP-TotalPages'])

  # create an empty list to append response pages
  data = []

  # loop through response pages
  while page <= pages:

    # reset page parameter
    params['page'] = page

    # call API with updated parameters - store response
    response = requests.request(
      "GET",
      url = url,
      params = params,
      headers = headers
      )

    # loop through JSON response - append rows to list
    for i in response.json():
      data.append(i)

    # increment page
    page = page + 1

    # sleep 1 second
    sleep(1)

  # flatted JSON and convert data to df
  all_data = pd.json_normalize(data)

  # define filename
  # replace handles multisite instances where WP is installed on subfolder
  file_name = str(domain).replace("/","_") + "_all_posts.csv"

  # store df in csv
  all_data.to_csv(file_name)
  print(file_name + " downloaded in current folder.")

elif response.status_code == 403:
  print(response.status_code, "error: ensure the user agent variable is set.")
else:
  print(response.status_code, "error.")