# GDoc_File

A class based approach for interacting with GoogleDrive Files

In [1]:
# | default_exp google.GDoc_File

In [2]:
# | exporti


import os

from dataclasses import dataclass, field
from enum import Enum
from typing import List

import datetime as dt


from googleapiclient.errors import HttpError

from gdoc_sync.utils import (
    upsert_folder,
    convert_str_to_date,
    convert_str_file_name,
    download_zip,
    download_pptx,
)

import gdoc_sync.google.auth as ga

from nbdev.showdoc import patch_to

In [3]:
# | hide
from nbdev.showdoc import show_doc
from dotenv import load_dotenv

load_dotenv("../.env")

True

In [4]:
# | exports


class GoogleDrive_MimeType_Enum(Enum):
    """for translating Google Drive types into 'exports as'"""

    docx = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
    pptx = "application/vnd.openxmlformats-officedocument.presentationml.presentation"

    odt = "application/vnd.oasis.opendocument.text"
    rtf = "application/rtf"
    pdf = "application/pdf"
    txt = "text/plain"
    zip_file = "application/zip"
    epub = "application/epub+zip"

    folder = "application/vnd.google-apps.folder"
    gdoc = "application/vnd.google-apps.document"
    gsheet = "application/vnd.google-apps.spreadsheet"
    gshortcut = "application/vnd.google-apps.shortcut"
    gslides = "application/vnd.google-apps.presentation"

    # html = 'application/zip'
    # 'tex'  = 'application/zip'
    # 'html.zip'= 'application/zip'

In [5]:
# | exporti

download_factory = {
    "application/vnd.google-apps.presentation": {
        "download_fn": download_pptx,
        "export_type": GoogleDrive_MimeType_Enum.pptx,
    },
    "default": {
        "download_fn": download_zip,
        "export_type": GoogleDrive_MimeType_Enum.zip_file,
    },
}

# Google_Doc file

In [6]:
# | export
@dataclass
class GDoc_File:
    doc_id: str
    doc_name: str
    modified_time: dt.datetime
    url: str

    auth: ga.GoogleAuth = field(repr=False)
    mime_str: str

    mime_type: GoogleDrive_MimeType_Enum = None
    download_factory: dict = None

    parent_ls: List[str] = field(default=None)
    parents: List = field(default=None, repr=False)

    content = None

    def __post_init__(self):
        try:
            self.mime_type = GoogleDrive_MimeType_Enum(self.mime_str).name

        except Exception as e:
            print(e)

        if self.parent_ls:
            self.parents = [
                GDoc_File.get_from_id(document_id=parent, auth=self.auth)
                for parent in self.parent_ls
            ]

    @classmethod
    def _from_json(cls, obj: dict, auth: ga.GoogleAuth, parent_ls: [str] = None):
        return cls(
            mime_str=obj["mimeType"],
            url=obj["webViewLink"],
            doc_id=obj["id"],
            doc_name=obj["name"],
            modified_time=convert_str_to_date(obj["modifiedTime"]),
            parent_ls=parent_ls or obj.get("parents", None),
            auth=auth,
        )

In [7]:
show_doc(GDoc_File)

---

[source](https://github.com/jaewilson07/gdoc_sync/blob/main/gdoc_sync/google/GDoc_File.py#L68){target="_blank" style="float:right; font-size:smaller"}

### GDoc_File

>      GDoc_File (doc_id:str, doc_name:str, modified_time:datetime.datetime,
>                 url:str, auth:gdoc_sync.google.auth.GoogleAuth, mime_str:str,
>                 mime_type:__main__.GoogleDrive_MimeType_Enum=None,
>                 download_factory:dict=None, parent_ls:List[str]=None,
>                 parents:List=None)

In [8]:
# export
class GDoc_File_ServiceRequired(Exception):
    def __init__(self, doc_url=None, doc_id=None):
        message = f"service (googleclientapi.discovery.Resource) required to download {doc_url or doc_id}"
        super().__init__(message)

In [9]:
# | exporti


@patch_to(GDoc_File, cls_method=True)
def get_from_id(
    cls: GDoc_File,
    document_id: str,
    auth: ga.GoogleAuth = None,
    return_raw: bool = False,
):
    data = (
        auth.service.files()
        .get(
            fileId=document_id,
            fields="id,webViewLink, name, mimeType,modifiedTime,parents ",
        )
        .execute()
    )

    if return_raw:
        return data

    return cls._from_json(obj=data, auth=auth)

#### sample implementation of get_from_id

In [10]:
# import gdoc_sync.google.auth as ga

DOCUMENT_ID = "1m48jciWr2iZqwnhN7rezM8_GposSDZhGwQhsIdRqLJg"


# generates Credentials object
google_auth = ga.GoogleAuth.get_creds_from_env(
    credentials_env_key="GDOC_KEY", token_env_key="GDOC_TOKEN"
)

GDoc_File.get_from_id(
    document_id=DOCUMENT_ID, auth=google_auth, return_raw=False
).__dict__

using saved token
refreshing creds using saved token
generating service object on GoogleAuth


{'doc_id': '1m48jciWr2iZqwnhN7rezM8_GposSDZhGwQhsIdRqLJg',
 'doc_name': 'test_subfolder',
 'modified_time': datetime.datetime(2023, 8, 17, 21, 1, 6, 479000, tzinfo=tzutc()),
 'url': 'https://docs.google.com/document/d/1m48jciWr2iZqwnhN7rezM8_GposSDZhGwQhsIdRqLJg/edit?usp=drivesdk',
 'auth': GoogleAuth(creds=<google.oauth2.credentials.Credentials object at 0x7f9236aecc40>, service=<googleapiclient.discovery.Resource object at 0x7f9235201630>, scope=['https://www.googleapis.com/auth/drive', 'https://www.googleapis.com/auth/drive.file', 'https://www.googleapis.com/auth/drive.metadata.readonly', 'https://www.googleapis.com/auth/drive.readonly']),
 'mime_str': 'application/vnd.google-apps.document',
 'mime_type': 'gdoc',
 'download_factory': None,
 'parent_ls': ['1grGeTxDXjEoo8MkRsMzTg6v_Pdtuxi1T'],
 'parents': [GDoc_File(doc_id='1grGeTxDXjEoo8MkRsMzTg6v_Pdtuxi1T', doc_name='subfolder_test', modified_time=datetime.datetime(2023, 8, 17, 21, 0, 55, 634000, tzinfo=tzutc()), url='https://drive.

In [11]:
# | export
class GDocFile_DownloadError(Exception):
    def __init__(self, doc_url):
        message = f"failure to download content for {doc_url}"
        super().__init__(message)

In [12]:
# | exporti
@patch_to(GDoc_File)
def get_content(self: GDoc_File):
    self.download_factory = download_factory.get(
        self.mime_type, download_factory["default"]
    )

    loop = 0

    while loop < 3 and self.content is None:
        try:
            loop += 1
            self.content = (
                self.auth.service.files()
                .export(
                    fileId=self.doc_id,
                    mimeType=self.download_factory["export_type"].value,
                )
                .execute()
            )

        except HttpError as err:
            print(err)

        except Exception as e:
            print(e)

    if not self.content:
        raise GDocFile_DownloadError(self.webViewLink)

    return self.content

In [13]:
import gdoc_sync.google.auth as ga

DOCUMENT_ID = "1m48jciWr2iZqwnhN7rezM8_GposSDZhGwQhsIdRqLJg"


# generates Credentials object
google_auth = ga.GoogleAuth.get_creds_from_env(
    credentials_env_key="GDOC_KEY", token_env_key="GDOC_TOKEN"
)

gdoc = GDoc_File.get_from_id(
    document_id=DOCUMENT_ID, auth=google_auth, return_raw=False
)

gdoc.get_content()

using saved token
refreshing creds using saved token
generating service object on GoogleAuth


b'PK\x03\x04\x14\x00\x08\x08\x08\x00\xa3r\x9aW\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x13\x00\x00\x00test_subfolder.html\x95R\xcbn\xc3 \x10\xfc\x15\x8bJ\xbd\xd1\xc4Q\xfa\x10\xd8\x96z\xe9\x17\xa4\x1f\x80\xcd\xda\xa0b \xb0N\xe2~}\xc1v\x0e\x95z\xe9J\xa0Y\xed\xec\xec\x88\xa5R8\x9a\xa6R dS\x8d\x80\xa2\xe8\x9cE\xb0X\x13\x84\x1b\xeer\x99\x17\x9d\x12!\x02\xd6\x9f\xa7\x0f\xfaF\n\x85\xe8)\x9c\'}\xa9\xc9F\xa78{ M\xb5[\x95Z\'\xe7\xa23"\xc6\x9aH\xd7\xd1\x8dE\x8a\x88\xb3\x81\x9a\xb4\xa2\xfb\x1a\x82\x9b\xacL%\xe3\x02{\xe8\x97\xe0^H\xa9\xed\xc0^\x0f\x1e\x8b\xdf\x17\x1f\xc5\x8d^\xb5D\xc5\x8e/o\x1e\xd34\x7f\xd7\xbb\xb7\xed\x13)\x0c\xda&\xb0\xe9\xee\x97\xe0}2@\xa3\xfe\x06V\x96IjI{1j3\xb3\xc7\xf3\xe4\x90\xbf\x07-\xcc\n\xb9\xd1\x16\xa8\x02=(d\xe5S\xf9\xcc]\xf0J\xd8\xc8\x0e<\xcdw\xd7\x0c\xf2\xebPa\xf4`\x99\x81>\x9b\x89^\xd8\xbb\x9f?\x86_W\xbdc\xca\x97^\t\x9d\x0b\x02\xb5\xb3\xcc:\x0b\xfc\x02\x01u\'\xcc&\xda\x8a\x08\xd9\xc7\xff\x9c\xaf\xe4\xec!\x89\x86Q\x18\xd2\x9c b\x11\xa7\xb6wFB\xa8v\xd9f\xda\x93O

In [14]:
# | exporti
@patch_to(GDoc_File)
def download_file(
    self: GDoc_File, output_folder: str = "", is_use_file_id: bool = False
):
    if not self.content:
        self.get_content()

    # create output_folder
    clean_file_name = (
        self.doc_id if is_use_file_id else convert_str_file_name(self.doc_name)
    )

    output_folder_path = os.path.join(output_folder, clean_file_name)
    upsert_folder(output_folder_path)

    return self.download_factory["download_fn"](self.content, output_folder_path)

In [15]:
# import gdoc_sync.google.auth as ga

DOCUMENT_ID = "1m48jciWr2iZqwnhN7rezM8_GposSDZhGwQhsIdRqLJg"

# generates Credentials object
google_auth = ga.GoogleAuth.get_creds_from_env(
    credentials_env_key="GDOC_KEY", token_env_key="GDOC_TOKEN"
)

gdoc = GDoc_File.get_from_id(
    document_id=DOCUMENT_ID, auth=google_auth, return_raw=False
)
gdoc.download_file(output_folder="../TEST/drive/gdoc_download-file")

using saved token
refreshing creds using saved token
generating service object on GoogleAuth


'successfully downloaded zip to ../TEST/drive/gdoc_download-file/test_subfolder'

In [16]:
# | hide
import nbdev

nbdev.nbdev_export()