In [None]:
import pandas as pd
import requests
import json
import os

In [None]:
def download_files(config):
    input_file = config["csv_input_file"]
    input_data = pd.read_csv(input_file)
    
    process_data = []
    for index, row in input_data.iterrows():
        node_id = str(row["Node_id"])
        print("donwloading work with node_id: " + str(node_id))
        
        title = str(row["title"])
        model = str(row["model"])
        success_flag, response_text = get_item_children(config, node_id)
        
        if success_flag == False:
            process_item = {"node_id": node_id,
                        "media_id": "",
                        "file_id": "",
                        "title": title,
                        "model": model,
                        "file_uri": "",
                        "download_status": "Unable to get Children"
                       }
            process_data.append(process_item)
        else:

            process_item = {"node_id": node_id,
                        "media_id": "",
                        "file_id": "",
                        "title": title,
                        "model": model,
                        "file_uri": "",
                        "download_status": "Success"
                       }
            process_data.append(process_item)
            
            node_data = json.loads(response_text)
            for child_item in node_data:
                child_node_id = child_item["nid"]
                child_node_title = child_item["title"]
                child_media_id = child_item["field_islandora_object_media"]
                
                media_success_flag, file_id = get_file_id_from_media(config, child_media_id)
                if media_success_flag == True:
                    print("donwloading file: " + str(file_id))
                    
                    file_success_flag, full_file_path = download_single_file(config, node_id, child_media_id, file_id)

                process_item = {"node_id": child_node_id,
                        "media_id": child_media_id,
                        "file_id": file_id,
                        "title": child_node_title,
                        "model": "Page",
                        "file_uri": full_file_path,
                        "download_status": file_success_flag
                       }
                process_data.append(process_item)
    return process_data

In [None]:
# Get node children info
# 
def get_item_children(config, node_id):
    headers = {"Content-Type": "application/json"}
    url = config["site_url"] + "/node/" + str(node_id) + "/children_rest?_format=json"
    response = requests.get(
            url,
            headers=headers)
    
    success_flag = False
    response_text = ""
    
    if response.status_code == 200:
        success_flag = True
        response_text = response.text
        
    return success_flag, response_text
        

In [None]:
def get_file_id_from_media(config, media_id):
    headers = {"Content-Type": "application/json"}
    url = config["site_url"] + "/media/" + str(media_id) + "?_format=json"
    response = requests.get(
            url,
            headers=headers)
    
    success_flag = False
    target_id = ""
    
    if response.status_code == 200:
        response_text = response.text
        media_data = json.loads(response_text)

        if "field_media_image" in media_data:
            target_id = media_data["field_media_image"][0]["target_id"]
            success_flag = True
            
    return success_flag, target_id    
    

In [None]:
def download_single_file(config, node_id, media_id, file_id):
    headers = {"Content-Type": "application/json"}
    url = config["site_url"] + "/file/" + str(file_id) + "?_format=json"
    response = requests.get(
            url,
            headers=headers)
    
    success_flag = False
    full_file_path = ""
    
    if response.status_code == 200:
        response_text = response.text
        
        file_data = json.loads(response_text)

        if "uri" in file_data:
            file_uri = file_data["uri"][0]["url"]
            file_dir = config["output_dir_path"] + "/" + str(node_id) + "/" + str(media_id) + "/" + str(file_id) + "/"
            
            try:
                if not  os.path.isdir(file_dir):
                    os.makedirs(file_dir)
            
                success_flag, full_file_path = get_file_from_drupal(config, file_uri, file_dir)
                success_flag = success_flag      
                
            except Exception as error:
                print("An error occurred:", error) # An error occurred: name 'x' is not defined
                pass
        return success_flag, full_file_path   

In [None]:
def get_file_from_drupal(config, file_uri, output_dir):
    url = config["site_url"] + "/" + str(file_uri)
    response = requests.get(
            url)
    
    success_flag = False
    
    if response.status_code == 200:
        head, local_file_name = os.path.split(file_uri)
        full_file_path = os.path.join(output_dir,local_file_name)
        
        with open(full_file_path, 'wb') as file:
            file.write(response.content)
            success_flag = True
    return success_flag, full_file_path   