<a href="https://colab.research.google.com/github/iypc-team/CoLab/blob/master/Writefile_GDriveWriter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# %pip install exporters

In [3]:
# from IPython.utils.path import expand_path
from __future__ import absolute_import
import json
import os
import shutil
import tempfile
from collections import Counter
try:
    from exporters.default_retries import retry_long
    from exporters.writers.filebase_base_writer import FilebaseBaseWriter
    from exporters.writers.base_writer import InconsistentWriteState
except ModuleNotFoundError:
    %pip install exporters


class GDriveWriter(FilebaseBaseWriter):
    """
    Writes items to Google Drive account.
    It is a File Based writer, so it has filebase

        - client_secret (object)
            JSON object containing client secrets (client-secret.json) file
            obtained when creating the google drive API key.

        - credentials (object)
            JSON object containing credentials, obtained by authenticating the
            application using the bin/get_gdrive_credentials.py ds script

        - filebase (str)
            Path to store the exported files
    """

    supported_options = {
        'credentials': {'type': object},
        'client_secret': {'type': object},
    }

    def __init__(self, *args, **kwargs):
        super(GDriveWriter, self).__init__(*args, **kwargs)
        from pydrive.auth import GoogleAuth
        from pydrive.drive import GoogleDrive
        gauth = GoogleAuth()
        files_tmp_path = tempfile.mkdtemp()
        client_secret_file = os.path.join(files_tmp_path, 'secret.json')
        with open(client_secret_file, 'w') as f:
            f.write(json.dumps(self.read_option('client_secret')))
        gauth.LoadClientConfigFile(client_secret_file)
        credentials_file = os.path.join(files_tmp_path, 'credentials.json')
        with open(credentials_file, 'w') as f:
            f.write(json.dumps(self.read_option('credentials')))
        gauth.LoadCredentialsFile(credentials_file)
        shutil.rmtree(files_tmp_path)
        self.drive = GoogleDrive(gauth)
        self.set_metadata('files_counter', Counter())
        self.set_metadata('files_written', [])
        
    def get_file_suffix(self, path, prefix):
        """
        Gets a valid filename
        """
        parent = self._ensure_folder_path(path)

        file_list = self.drive.ListFile({
            'q': "'{}' in parents and trashed=false and title contains '{}'".format(
                parent['id'], prefix)}).GetList()
        try:
            number_of_files = len(file_list)
        except:
            number_of_files = 0
        return '{0:04}'.format(number_of_files)

    def _ensure_folder_path(self, filebase_path):
        """Creates the directory for the path given, recursively creating
        parent directories when needed"""
        folders = filebase_path.split('/')
        parent = {"id": "root"}
        for folder in folders:
            file_list = self.drive.ListFile(
                {'q': "'{}' in parents and trashed=false and title = '{}'".format(
                    parent['id'], folder)}).GetList()
            if not len(file_list):
                f = self.drive.CreateFile({'title': folder, 'parents': [parent], 'mimeType': 'application/vnd.google-apps.folder'})
                f.Upload()
            else:
                parent = {"id": file_list[-1]["id"]}
        return parent

    @retry_long
    def write(self, dump_path, group_key=None, file_name=None):
        if group_key is None:
            group_key = []
        filebase_path, filename = self.create_filebase_name(group_key, file_name=file_name)
        parent = self._ensure_folder_path(filebase_path)
        file = self.drive.CreateFile({'title': filename, 'parents': [parent]})
        file.SetContentFile(dump_path)
        file.Upload()
        self._update_metadata(dump_path, file)
        self.logger.info('Uploaded file {}'.format(file['title']))

    def _update_metadata(self, dump_path, file):
        buffer_info = self.write_buffer.get_metadata(dump_path)
        key_info = {
            'size': buffer_info['size'],
            'remote_size': file['fileSize'],
            'hash': buffer_info['file_hash'],
            'remote_hash': file['md5Checksum'],
            'title': file['title'],
        }
        self.get_metadata('files_written').append(key_info)

    def _check_write_consistency(self):
        for file_info in self.get_metadata('files_written'):
            if str(file_info['size']) != str(file_info['remote_size']):
                msg = 'Unexpected size of file {title}. Expected {size} - got {remote_size}'
                raise InconsistentWriteState(msg.format(**file_info))
            if file_info['hash'] != file_info['remote_hash']:
                msg = 'Unexpected hash of file {title}. Expected {hash} - got {remote_hash}'
                raise InconsistentWriteState(msg.format(**file_info))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting exporters
  Downloading exporters-0.7.0.tar.gz (59 kB)
[K     |████████████████████████████████| 59 kB 3.5 MB/s 
Collecting retrying
  Downloading retrying-1.3.3.tar.gz (10 kB)
Building wheels for collected packages: exporters, retrying
  Building wheel for exporters (setup.py) ... [?25l[?25hdone
  Created wheel for exporters: filename=exporters-0.7.0-py3-none-any.whl size=102832 sha256=6a2c0650466f3e2601523dcad6a3ff429812c9cbe15aab2ce17d234fd74cb078
  Stored in directory: /root/.cache/pip/wheels/5a/89/59/99a43eebf35663359c4bed93ada1232dc52f41a3883fadab5a
  Building wheel for retrying (setup.py) ... [?25l[?25hdone
  Created wheel for retrying: filename=retrying-1.3.3-py3-none-any.whl size=11447 sha256=d7a8e4302f82b5f65234e24767cb0aed626d9ff647810c1d3c01c5d6267f64ba
  Stored in directory: /root/.cache/pip/wheels/f9/8d/8d/f6af3f7f9eea3553bc2fe6d53e4b287dad18b06a861ac56ddf
Su

NameError: ignored