# Download Files From Citesphere Group

In [None]:
import os
import requests
import json

## Specify Properties

In the following properties need to be set before continuing:
- `FOLDER_NAME`: path to the folder in which files should be downloaded, can be relative or absolute. Default value downloads files into a folder "download" located next to this notebook.
- `GROUP_ID`: id of the Zotero group that should be downloaded (can be retrieved from the url of a group in Citesphere).
- `CITESPHERE_API_URL`: API endpoint of Citesphere (should end in `/api`).
- `TOKEN`: Citesphere access token.
- `GILES_ROOT`: Base url of Giles.

In [None]:
FOLDER_NAME = 'download/'
GROUP_ID = ''
CITESPHERE_API_URL = ''
TOKEN = ''
GILES_ROOT = ''

In [None]:
# the following should only be changed if the Citesphere API changes
ITEMS_API_URL = f"{CITESPHERE_API_URL}/v1/groups/{GROUP_ID}/items"

## Functions
The following functions do the main work of downloading files.

In [None]:
# get groups
def get_items():
    headers = {'Authorization': f'Bearer {TOKEN}'}
    response = requests.get(ITEMS_API_URL, headers=headers)
    return response.json()

In [None]:
def get_filename_from_response(response):
    content_disposition = response.headers.get('Content-Disposition')
    if content_disposition and 'filename=' in content_disposition:
        # Extract the filename value
        filename = content_disposition.split('filename=')[1].strip('"')
        return filename
    return None

In [None]:
def download_file(file_id):
    endpoint = f"{GILES_ROOT}/api/v2/resources/files/{file_id}/content"
    headers = {'Authorization': f'Bearer {TOKEN}'}
    response = requests.get(endpoint, headers=headers)
    filename = get_filename_from_response(response)

    # if we have a filename, we'll download the file
    # this will override files with the same name in the folder FOLDER_NAME!
    if filename:
        with open(FOLDER_NAME + filename, 'wb') as file:
            file.write(response.content)

## Download files
The following code uses the functions above to download the files.

In [None]:
# get info about files
items = get_items()

# get file ids to download
file_ids = []
for item in items['items']:
    uploads = item['gilesUploads']
    for upload in uploads:
        file_ids.append((upload['extractedText']['id']))

In [None]:
for file_id in file_ids:
    download_file(file_id)