<a href="https://colab.research.google.com/github/getaccept/notebooks/blob/master/API_Download_Archive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Download signed documents to a ZIP-archive

Functions to download all documents from selected entity and compress to zip-file.

__** NOTE ***__
- 

In [None]:
# import dependencies
import requests
import re
from unicodedata import normalize
import os
import ipywidgets as widgets
from zipfile import ZipFile
from google.colab import files
from IPython.display import HTML, display, clear_output

#Constants 
BASE_URL = "https://api.getaccept.com/v1"
SOURCE_ENTITY_ID = ""
#@title ↓↓ Click here to start
#@markdown This step might take a few seconds to run. <br>
#@markdown Then use __shift+enter__ key or click ► left of each step to go through the flow

In [None]:
email_widget = widgets.Text(
    value="",
    placeholder="Enter login email",
    description="Email:",
    disabled=False
)
password_widget = widgets.Password(
    value="",
    placeholder="Enter password",
    description="Password:",
    disabled=False
)
#@markdown Use the form below to fill in login details to your entity in GetAccept and then run next cell to login
widgets.VBox([email_widget, password_widget])

In [None]:
#@markdown Login and store API token
if email_widget.value and password_widget.value:
  payload = { "email": email_widget.value, "password": password_widget.value}
  if SOURCE_ENTITY_ID != "":
    payload["entity_id"] = SOURCE_ENTITY_ID
  response = requests.post(BASE_URL+"/auth", json=payload)
  data = response.json()
  if "access_token" in data:
    source_auth_headers = { "Authorization": "bearer " + data["access_token"]}
  else:
    raise TypeError(data["errors"], "Please check your credentials")
  # Check login and list entities
  response = requests.get(BASE_URL+"/users/me", headers=source_auth_headers)
  user_data = response.json()
  print("Logged in as " + user_data["user"]["first_name"] + " on entity " + user_data["user"]["entity_name"])
  SOURCE_ENTITY_ID = user_data["user"]["entity_id"]
else:
  raise TypeError("Could not login, missing email or password!")

In [None]:
#@markdown Select the source entity you would like to get settings from. When you're done, run the next cell
source_entity_list = list(map(lambda x: (x["name"],x["id"]), user_data["entities"]))
source_entity_picker = widgets.Select(
    options=sorted(source_entity_list),
    value=SOURCE_ENTITY_ID,
)
source_entity_picker

In [None]:
#@markdown Verifying entity token of source...
if source_entity_picker.value != SOURCE_ENTITY_ID:
  # Switch entity
  response = requests.get(BASE_URL+"/refresh/"+source_entity_picker.value, headers=source_auth_headers)
  data = response.json()
  if "access_token" in data:
    source_auth_headers = { "Authorization": "bearer " + data["access_token"]}
  SOURCE_ENTITY_ID = source_entity_picker.value
print("Authenticated to source entity \"%s\"" % source_entity_picker.label)

In [None]:
document_count_widget = widgets.Text(
    value="100",
    placeholder="Enter document count",
    description="Count:",
    disabled=False
)
document_offset_widget = widgets.Text(
    value="0",
    placeholder="Enter document offset",
    description="Offset:",
    disabled=False
)
#@title Document Count & Offset
#@markdown Enter document count and offset. <br><hr>
#@markdown *Count* is the number of documents to compress to each zip-file and depending on the document size you will have to limit this to a few megabytes per file. For smaller document sizes, recommended count is 100 and for larger, 10-20.<hr>
#@markdown *Offset* is the document number to start downloading at. If you have downloaded the first 100 documents this should be 100 the next run.
widgets.VBox([document_count_widget, document_offset_widget])

#EMAIL = "" #@param {type:"string"}
#PASSWORD = "" #@param {type:"string"}
#BASEURL = "https://api.getaccept.com/v1" #@param {type:"string"}
#SOURCE_ENTITY = "" #@param {type: "string"}
#DOCUMENT_COUNT = 100 #@param {type: "number"}
#DOCUMENT_OFFSET = 0 #@param {type: "number"}

In [None]:
#@markdown Preparing functions
def download_document(document_id, filename):
  url = BASE_URL + "/documents/" + document_id + "/download"
  params = { "direct": "true" }
  response = requests.get(url, headers=source_auth_headers, params=params)
  directory = "downloads/"
  if response.content:
    if not os.path.exists(directory):
      os.makedirs(directory)
    with open(directory + filename, 'wb') as f:
      f.write(response.content)
      f.close()
    return directory + filename
  else:
    return None
 
def slugify(text, delim='-'):
    """Generates an slightly worse ASCII-only slug."""
    result = []
    punct_re = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.:]+')
    for word in punct_re.split(text.lower()):
        word = normalize('NFKD', word).encode('ascii', 'ignore')
        word = word.decode('utf-8')
        if word:
            result.append(word)
    return delim.join(result)

def progress(value, max=100):
    return HTML("""
        <progress
            value='{value}'
            max='{max}',
            style='width: 100%'
        >
            {value}
        </progress>
    """.format(value=value, max=max))

In [None]:
#@title Get document list
# Get data for signed documents
DOCUMENT_COUNT = int(document_count_widget.value)
DOCUMENT_OFFSET = int(document_offset_widget.value)
url = BASE_URL + "/documents"
params = { "filter": "signed", "showall": "true", "limit": DOCUMENT_COUNT, "offset": DOCUMENT_OFFSET }
response = requests.get(url, headers=source_auth_headers, params=params)
documents = response.json()
print("%d documents ready to download" % len(documents))

In [None]:
#@title Run to export ZIP file with signed documents
zipfilename = slugify(source_entity_picker.label) + "_" + str(DOCUMENT_OFFSET) + "-" + str(DOCUMENT_OFFSET + DOCUMENT_COUNT) + ".zip"
count = 0
prog = display(progress(count, DOCUMENT_COUNT), display_id=True)
with ZipFile(zipfilename, "w") as zf:
  for document in documents:
    filename = slugify(document["name"]) + "_" + document["id"] + ".pdf"
    dlfile = download_document(document["id"],filename)
    zf.write(dlfile)
    os.remove(dlfile)
    count = count + 1
    prog.update(progress(count, DOCUMENT_COUNT))
zf.close()
files.download(zipfilename)