In [None]:
import os
import requests

In [None]:
def download_pdfs(url_list, local_folder):
    """
    Downloads PDFs from a list of URLs and saves them in a specified local folder.
    
    Parameters:
    - url_list: A list of URLs in the form "http://<ROOT>/<GUID>/pdf/<FILENAME>.pdf"
    - local_folder: The name of the local folder where the PDFs should be saved.
    
    Returns:
    A list of the final local paths to all files downloaded.
    """
    if not os.path.exists(local_folder):
        os.makedirs(local_folder)
    
    downloaded_files = []
    
    for url in url_list:
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raises an HTTPError if the response was an error
            
            # Extract GUID and FILENAME from the URL
            parts = url.split('/')
            guid = parts[-3]
            filename = parts[-1]
            
            # Create subfolder for the GUID if it doesn't exist
            guid_folder = os.path.join(local_folder, guid)
            if not os.path.exists(guid_folder):
                os.makedirs(guid_folder)
            
            file_path = os.path.join(guid_folder, filename)
            
            # Save the PDF
            with open(file_path, 'wb') as f:
                f.write(response.content)
            
            downloaded_files.append(file_path)
            print(f"Downloaded {filename} to {file_path}")
        
        except requests.exceptions.RequestException as e:
            print(f"Failed to download {url}: {e}")
    
    return downloaded_files


In [None]:
# Example usage
url_list = [
"http://documents.worldbank.org/curated/en/713741468337198922/pdf/536490BRI0SPAN10Box345621B01PUBLIC1.pdf",
"http://documents.worldbank.org/curated/en/615181468141301901/pdf/394600turkey0p1io0economic01public1.pdf",
"http://documents.worldbank.org/curated/en/561931468184777746/pdf/96423-BRI-CHILD-FECES-Box391444B-PUBLIC-WSP-Chad-CFD-Profile.pdf",
"http://documents.worldbank.org/curated/en/115871467986280887/pdf/96433-BRI-CHILD-FECES-Box391444B-PUBLIC-WSP-Malawi-CFD-Profile.pdf",
"http://documents.worldbank.org/curated/en/449301467990363947/pdf/E23680v50P11151C10AFR1HCWMP1P111556.pdf",
"http://documents.worldbank.org/curated/en/941581468190761057/pdf/96403-BRI-2012Apr16-P003566-SPANISH-China-Basic-Health-Services-Box-391427B-PUBLIC.pdf",
"http://documents.worldbank.org/curated/en/509091468331002932/pdf/663880NEWS00PU0x365772B0HDQU0402011.pdf",
"http://documents.worldbank.org/curated/en/405711468175729150/pdf/SFG1354-EA-P145335-PUBLIC-Disclosed-9-29-2015-Box393207B.pdf",
"http://documents.worldbank.org/curated/en/752981468262477356/pdf/529370BRI0REVI10BOX353820B01PUBLIC1.pdf",
"http://documents.worldbank.org/curated/en/210641468250499158/pdf/multi0page.pdf",
"http://documents.worldbank.org/curated/en/261121468118730678/pdf/434650ESW0P1061sclosed0June01302008.pdf",
"http://documents.worldbank.org/curated/en/586751527602077814/pdf/Egypt-Arab-Republic-of-EG-Health-Care-Quality-Improvement.pdf",
"http://documents.worldbank.org/curated/en/132391620237296622/pdf/Additional-Financing-Environmental-and-Social-Review-Summary-ESRS-Ghana-COVID-19-Emergency-Preparedness-and-Response-Project-Second-Additional-Financing-P176485.pdf",
"http://documents.worldbank.org/curated/en/505471468333856058/pdf/480610BR0R200910disclosed0417120091.pdf",
"http://documents.worldbank.org/curated/en/858561468015878973/pdf/623750PROP0P110delo0BM000abril02011.pdf",
"http://documents.worldbank.org/curated/en/592131632358667692/pdf/Revised-Labor-Management-Procedures-Public-Information-and-Awareness-Services-for-Vulnerable-Communities-in-Lao-PDR-P170640.pdf",
"http://documents.worldbank.org/curated/en/306031588146608710/pdf/Infection-Control-and-Waste-Management-for-Lesotho-Standard-Operating-Procedures.pdf",
"http://documents.worldbank.org/curated/en/166011467999980505/pdf/ICRR14891-P083997-Box394872B-PUBLIC.pdf",
"http://documents.worldbank.org/curated/en/452051468106470860/pdf/multi-page.pdf",
"http://documents.worldbank.org/curated/en/635231468033354371/pdf/0394025FCED4BE3185256F03000FFAF4.pdf",
"http://documents.worldbank.org/curated/en/739891468330315677/pdf/121830WDR0CHINESE0Box74476B01PUBLIC1.pdf",
"http://documents.worldbank.org/curated/en/497961491988459594/pdf/114174-NEWS-PUBLIC-English-CU-APR17-web.pdf"
]

In [None]:
local_folder = "/Users/willit/Documents/WorldBank/samplefiles"


In [None]:
downloaded_files = download_pdfs(url_list, local_folder)
print("Completed downloads:")
for file in downloaded_files:
    print(file)