# GDoc_Files

> a class based approach for interacting with folder structures in Google Drive

In [None]:
# | default_exp google.GDoc_Files

In [2]:
# | exporti

import os

from dataclasses import dataclass, field
from typing import List

from gdoc_sync.utils import convert_str_file_name
import gdoc_sync.google.auth as ga
import gdoc_sync.google.GDoc_File as gd

from nbdev.showdoc import patch_to

In [3]:
# | hide
from dotenv import load_dotenv

assert load_dotenv("../.env")

# Google Drive File Looper

retrieves a listing of all the files and subfiles in a folder


In [4]:
# | export
@dataclass
class GDoc_Files:
    auth: ga.GoogleAuth = field(repr=False)

    folder_id: str = None
    folder_content: List[gd.GDoc_File] = field(default_factory=lambda: [])

    service: ga.Resource = field(default=None, repr=False)
    creds: ga.Credentials = field(default=None, repr=False)

    def __post_init__(self):
        self.creds = self.auth.creds
        self.service = self.auth.generate_service(
            service_name="drive", service_version="v3"
        )

In [5]:
# | exporti


@patch_to(GDoc_Files)
def _get_folder_contents(
    self: GDoc_Files, folder_id: str, return_raw: bool = False
) -> List[gd.GDoc_File]:
    """
    retrieves dictionary representation of objects in google_drive folder
    if the folder_id maps to a file will return list with one object
    """
    auth = self.auth
    page_token = None
    file_ls = []

    while True:
        res = (
            auth.service.files()
            .list(
                q=f"'{folder_id}' in parents",
                pageSize=10,
                fields="nextPageToken, files(id,webViewLink, name, mimeType,modifiedTime )",
                pageToken=page_token,
            )
            .execute()
        )

        file_ls += res.get("files", [])

        page_token = res.get("nextPageToken", None)

        if not page_token:
            break

    if return_raw:
        return file_ls

    return [gd.GDoc_File._from_json(file_obj, auth=self.auth) for file_obj in file_ls]

In [6]:
FOLDER_ID = "1SRrD1dNgZgHYjnhkJtARbLhydMP94qWi"

# generates Credentials object
google_auth = ga.GoogleAuth.get_creds_from_env(
    credentials_env_key="GDOC_KEY", token_env_key="GDOC_TOKEN"
)

gdoc_files = GDoc_Files(folder_id=FOLDER_ID, auth=google_auth)

gdoc_files._get_folder_contents(folder_id=FOLDER_ID)

using saved token
generating service object on GoogleAuth
generating service object on GoogleAuth


[GDoc_File(doc_id='1j7XsbvFy0xUgGL6i-3LSChKvzSmTZSOyimEt6tQS-Kk', doc_name='Sample Doc for Google_Sync project', modified_time=datetime.datetime(2023, 11, 27, 19, 39, 0, 131000, tzinfo=tzutc()), url='https://docs.google.com/document/d/1j7XsbvFy0xUgGL6i-3LSChKvzSmTZSOyimEt6tQS-Kk/edit?usp=drivesdk', mime_str='application/vnd.google-apps.document', mime_type='gdoc', download_factory=None, parent_ls=None),
 GDoc_File(doc_id='1grGeTxDXjEoo8MkRsMzTg6v_Pdtuxi1T', doc_name='subfolder_test', modified_time=datetime.datetime(2023, 8, 17, 21, 0, 55, 634000, tzinfo=tzutc()), url='https://drive.google.com/drive/folders/1grGeTxDXjEoo8MkRsMzTg6v_Pdtuxi1T', mime_str='application/vnd.google-apps.folder', mime_type='folder', download_factory=None, parent_ls=None),
 GDoc_File(doc_id='1_k4NRraKI1TmHNlpQCuqJrWr6dP7DNracdMCtfN8XlM', doc_name='sample slide', modified_time=datetime.datetime(2023, 8, 17, 13, 54, 31, 973000, tzinfo=tzutc()), url='https://docs.google.com/presentation/d/1_k4NRraKI1TmHNlpQCuqJrWr6

In [7]:
# | exporti


@patch_to(GDoc_Files)
def get_files(
    self: GDoc_Files, folder_id, file_ls=None, folder_path="", is_recursive: bool = True
):
    auth = self.auth

    if not file_ls:
        file_ls = []

    """recursive function to get files in a folder and map over files in subfolder"""

    new_files = self._get_folder_contents(folder_id=folder_id)

    file_ls += new_files

    if new_files and is_recursive:
        [
            self.get_files(
                folder_id=google_doc.doc_id,
                folder_path=os.path.join(
                    folder_path, convert_str_file_name(google_doc.doc_name)
                ),
                file_ls=file_ls,
            )
            for google_doc in new_files
            if google_doc.mime_type == "folder"
        ]

    self.file_ls = file_ls
    return self.file_ls

In [8]:
FOLDER_ID = "1SRrD1dNgZgHYjnhkJtARbLhydMP94qWi"

# generates Credentials object
google_auth = ga.GoogleAuth.get_creds_from_env(
    credentials_env_key="GDOC_KEY", token_env_key="GDOC_TOKEN"
)

gdoc_files = GDoc_Files(auth=google_auth)
gdoc_files.get_files(folder_id=FOLDER_ID, is_recursive=True)

using saved token
generating service object on GoogleAuth
generating service object on GoogleAuth


[GDoc_File(doc_id='1j7XsbvFy0xUgGL6i-3LSChKvzSmTZSOyimEt6tQS-Kk', doc_name='Sample Doc for Google_Sync project', modified_time=datetime.datetime(2023, 11, 27, 19, 39, 0, 131000, tzinfo=tzutc()), url='https://docs.google.com/document/d/1j7XsbvFy0xUgGL6i-3LSChKvzSmTZSOyimEt6tQS-Kk/edit?usp=drivesdk', mime_str='application/vnd.google-apps.document', mime_type='gdoc', download_factory=None, parent_ls=None),
 GDoc_File(doc_id='1grGeTxDXjEoo8MkRsMzTg6v_Pdtuxi1T', doc_name='subfolder_test', modified_time=datetime.datetime(2023, 8, 17, 21, 0, 55, 634000, tzinfo=tzutc()), url='https://drive.google.com/drive/folders/1grGeTxDXjEoo8MkRsMzTg6v_Pdtuxi1T', mime_str='application/vnd.google-apps.folder', mime_type='folder', download_factory=None, parent_ls=None),
 GDoc_File(doc_id='1_k4NRraKI1TmHNlpQCuqJrWr6dP7DNracdMCtfN8XlM', doc_name='sample slide', modified_time=datetime.datetime(2023, 8, 17, 13, 54, 31, 973000, tzinfo=tzutc()), url='https://docs.google.com/presentation/d/1_k4NRraKI1TmHNlpQCuqJrWr6

In [9]:
# | hide
import nbdev

nbdev.nbdev_export()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)