-
Notifications
You must be signed in to change notification settings - Fork 0
/
wpmedia.py
57 lines (44 loc) · 2.04 KB
/
wpmedia.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Author: Christian Wiegman | equationunequal
# Website: http://www.newskin.nl
# Purpose: Download all media files from a WordPress website using the wp-json API
# License: http://unlicense.org
# Imports
import requests
import os
# Set the URL of the WordPress site
site_url = input("Enter the URL of the WordPress site: ")
# Create media directory if it does not exist
if not os.path.exists('media'):
os.makedirs('media')
# Add http:// if needed
if site_url.startswith("http://") == False and site_url.startswith("https://") == False:
site_url = "http://" + site_url
# Spoof headers, required for some sites
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36'}
print("Scanning site: " + site_url)
end_reached = False
page_number = 1
while end_reached == False:
# Make a GET request to retrieve a list with all media files (maximum 100 per page)
wp_response = requests.get(f"{site_url}/wp-json/wp/v2/media/?per_page=100&page=" + str(page_number), headers=headers).json()
print("Downloading files from page " + str(page_number))
# Check for page without media files
if isinstance(wp_response, dict) and wp_response["code"] == "rest_post_invalid_page_number":
end_reached = True
print("Reached the end.")
break
else:
# Loop through each media file and download it
for media in wp_response:
media_url = media["source_url"]
media_file_name = os.path.basename(media_url)
media_file_path = f"media/{media_file_name}"
media_file = requests.get(media_url, headers=headers)
# Save the media file in the media directory
with open(media_file_path, "wb") as f:
f.write(media_file.content)
page_number += 1
# Version History
# 2024-02-10: 1.0
# 2024-02-11: 1.1 Add http:// if needed, create media directory if not present
# 2024-02-27: 1.2 Add user-agent in header