# Download Data from Google Drive

**Timm Nawrocki**  
Alaska Center for Conservation Science  
2019-03-25

In [None]:
# -*- coding: utf-8 -*-
# ---------------------------------------------------------------------------
# Download Data from Google Drive
# Author: Timm Nawrocki
# Created on: 2019-03-25
# Usage: Must be executed as a Jupyter Notebook in an Python 2 installation with Google API and oauth2client installed.
# Description: "Download Data from Google Drive" programmatically downloads batches of data from Google Drive. This notebook is intended to download the spectral data exported from Google Earth Engine.
# ---------------------------------------------------------------------------

## 1. Initialize Environment

In [None]:
# Import packages
import os
import io
import httplib2
import logging
from oauth2client.client import OAuth2WebServerFlow
from oauth2client.file import Storage
from apiclient.discovery import build
from apiclient.http import MediaIoBaseDownload
from apiclient import errors

# Define Local Drive
drive = 'F:/'

# Define target Google Drive folder
google_folder = '1p_CR7YW3yyvpiorYt_uxuGKlKkmBxLcE' #Sentinel 2
#google_folder = '1ptOXuFUwbnXpmhsW_hUqvI2_BiSRNOLS' #Landsat 8

# Define output directory
output_directory = os.path.join(drive, 'ACCS_Work/Projects/VegetationEcology/BristolBay_Vegetation/Project_GIS/Data_Input/source_data/imagery/sentinel2')
#output_directory = os.path.join(drive, 'ACCS_Work/Projects/VegetationEcology/BristolBay_Vegetation/Project_GIS/Data_Input/source_data/imagery/landsat8')

# Define logger
logging.basicConfig(filename='debug.log',level=logging.DEBUG)

# Define client id and secret for Google Drive API
client_id = '655284198639-hh3fg8es5hp9rtmvh4a0t5ulfigns9qq.apps.googleusercontent.com'
client_secret = 'cmelwkw9PFwtUKZ6ZvSQr-IH'

# Define Google Drive authorization information and credentials file
oauth_scope = 'https://www.googleapis.com/auth/drive'
redirect_uri = 'urn:ietf:wg:oauth:2.0:oob'
credentials_file = os.path.join(drive, 'ACCS_Work/Administrative/Credentials/credentials.json')

# Create credentials storage object
storage = Storage(credentials_file)
credentials = storage.get()

In [None]:
# Run through the OAuth flow and retrieve credentials
try:
    flow = OAuth2WebServerFlow(client_id, client_secret, oauth_scope, redirect_uri)
    authorize_url = flow.step1_get_authorize_url()
    print('Go to the following link in your browser: {0}'.format(authorize_url))
    code = raw_input('Enter verification code: ').strip()
    credentials = flow.step2_exchange(code)
    storage.put(credentials)
    print('User successfully authenticated.')
except:
    print('Could not authenticate user. Ensure client id and client secret are correct for Google Drive API.')
    quit()

In [None]:
# Create an http.client.Http object
http = httplib2.Http()
http = credentials.authorize(http)
drive_service = build('drive', 'v2', http=http)

In [None]:
# Check output directory and create if it does not exist
if os.path.exists(output_directory):
    print('Directory exists.')
else:
    print('Directory does not exist and will be created.')
    os.makedirs(output_directory)

## 2. Define Functions

In [None]:
# Define a function to create a list of all file IDs within a folder
def list_files(service, folder_id):
    """
    Description: creates a list of files by ID belonging to a Google Drive folder.
    Inputs: service -- Drive API service instance.
            folder_id -- ID of the folder from which to list files.
    """
    # Create empty list to store file IDs
    file_id_list = []
    page_token = None
    # Search folder for files
    while True:
        try:
            param = {}
            if page_token:
                param['pageToken'] = page_token
            children = service.children().list(folderId=folder_id, **param).execute()
            for child in children.get('items', []):
                file_id_list = file_id_list + [child['id']]
            page_token = children.get('nextPageToken')
            if not page_token:
                break
        except errors.HttpError, error:
            print('An error occurred: {0}'.format(error))
            break
    # Return file ID list
    return file_id_list

In [None]:
# Define a function to download data files from Google Drive
def download_file(file_id, output_directory):
    """
    Description: downloads a file from Google Drive by file ID.
    Inputs: file_id -- ID of the file to download.
            output_directory -- directory in which to save the downloaded file.
    """
    # Get file title metadata by file id
    file_meta = drive_service.files().get(fileId=file_id).execute()
    file_title = file_meta['title']
    # Generate download file path
    file_path = os.path.join(output_directory, file_title)
    print('\tSaving {0}...'.format(file_title))
    # Create request, file handler, and downloader
    request = drive_service.files().get_media(fileId=file_id)
    file_handler = io.FileIO(file_path, 'wb')
    downloader = MediaIoBaseDownload(file_handler, request)
    # Download file and report progress
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print('\t\tDownload {0}%...'.format(int(status.progress() * 100)))
    file_handler.close()
    print('\tSave complete.')

## 3. Download Files

In [None]:
# List all files in Google Drive Folder
file_id_list = list_files(drive_service, google_folder)

In [None]:
# Download all files in Google Drive Folder
count = 1
total = len(file_id_list)
for file_id in file_id_list:
    print('Downloading file {0} of {1}...'.format(count, total))
    download_file(file_id, output_directory)
    count += 1