# Google Workspace APIs
Use Google's Workspace API (managed here https://console.cloud.google.com/) for various tasks.

Credentials can be downloaded as `credentials.json` from the API Credentials and used to produce `token.json` for persistent use. OAuth 2.0 requires the redirect URI in the app and in the "Flow" - within the code - to be the same (the default port for localhost is 8080). It's also worth noting that for apps in "testing," users need to be added via the OAuth consent screen, although in testing "scope" does not need to be specified in the API (it can be tested and switched in the code).

TODO:
- convert to `.py` for CLI use
- enable use of folder ID from Google Drive rather than using `folder_map`
- split git functions into another class (maybe)
- add folder/file ignore features


- COMPLETE: fix bundle commits to be committed on last date, rather than first
- COMPLETE: bundle commits oldest to newest (to prevent older versions overwriting newer ones within bundles)

Import packages.

In [1]:
import io
import os
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
import time
import pytz
import datetime
import git

Google API class (based on `quickstart.py`).

In [2]:
class GoogleDrive:
    def __init__(self):
        # delete token.json before changing these
        self.scopes = [
            # 'https://www.googleapis.com/auth/drive.metadata.readonly',
            'https://www.googleapis.com/auth/drive.readonly'
        ]
        self.creds = None
        self.credentials()
        self.connect()
    
    def credentials(self):
        # store credentials (user access and refresh tokens)
        if os.path.exists('token.json'):
            self.creds = Credentials.from_authorized_user_file('token.json', self.scopes)
        # if no (valid) credentials available, let user log in
        if not self.creds or not self.creds.valid:
            if self.creds and self.creds.expired and self.creds.refresh_token:
                self.creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file('credentials.json', self.scopes)
                self.creds = flow.run_local_server()  # port MUST match redirect URI in Google App
            # save credentials for the next run
            with open('token.json', 'w') as token:
                token.write(self.creds.to_json())
                
    def connect(self):
        # attempt to connect to the API
        try:
            self.service = build('drive', 'v3', credentials=self.creds)
            # self.service = build('gmail', 'v1', credentials=self.creds)  # use later for gmail...
        except HttpError as error:
            print(f'An error occurred: {error}')
            
    def get_id(self, values, term='name', operator='=', ftype='file', ignore_trashed=True):
        q = f'{term} {operator} "{values}" '
        if ftype == 'folder':
            q += 'and mimeType = "application/vnd.google-apps.folder" '
        elif ftype == 'json':
            q += 'and mimeType = "application/json" '
        if ignore_trashed:
            q += 'and trashed = false'
        l = self.service.files().list(q=q).execute()
        
        return l['files']
            
    def folder_contents(self, i, ignore_trashed=True):
        q = f'"{i}" in parents '
        if ignore_trashed:
            q += 'and trashed = false '
        l = g.service.files().list(q=q).execute()
        
        return l['files']
            
    def get_revisions(self, i):
        try:
            r = self.service.revisions().list(fileId=i).execute()
        
            return r['revisions']
        
        except:
            return
        
    def qry_fields(self, i, r=None, fields=['parents']):
        if r is None:
            p = self.service.files().get(fileId=i, fields=','.join(fields)).execute()
        else:
            p = self.service.revisions().get(fileId=i, revisionId=r, fields=','.join(fields)).execute()
        
        return {f: p[f] for f in fields}
    
    def stream_file(self, i, r=None, out='stream', verbose=False):
        if r is None:
            request = self.service.files().get_media(fileId=i)
        else:
            request = self.service.revisions().get_media(fileId=i, revisionId=r)
        
        if out in ['stream', 'str']:
            stream = io.BytesIO()
        else:
            stream = io.FileIO(out, mode='w')
        downloader = MediaIoBaseDownload(stream, request)
        done = False
        while not done:
            status, done = downloader.next_chunk()
            if verbose:
                print(f'Download {int(status.progress() * 100)}%')
        if verbose:
            print(f'Size {status.total_size / 1024 / 1024:.2f}MB')

        if out in ['str']:
            return stream.getvalue()
        else:
            return stream
        
    ################
        
    def map_folder(self, folder, path=''):
        # if root, set path to folder name
        if path == '':
            path = folder['name']

        # scan contents
        contents = []
        for c in g.folder_contents(folder['id']):
            if c['mimeType'] == 'application/vnd.google-apps.folder':
                p = os.path.join(path, c['name'])
                contents.append(self.map_folder(c, path=p))
            else:
                f = {
                    'path': os.path.join(path, c['name']),
                    'id': c['id'],
                    'type': c['mimeType'],
                    'revisions': self.get_revisions(c['id'])
                }
                contents.append(f)
                
        # set up output dictionary
        out = {
            'path': path,
            'id': folder['id'],
            'type': folder['mimeType'],
            'contents': contents
        }
                
        return out
    
    def create_folders(self, d, root_folder=os.getcwd()):
        try:
            os.mkdir(os.path.join(root_folder, d['path']))
        except:
            pass

        for c in d['contents']:
            if 'contents' in c.keys():
                self.create_folders(c, root_folder=root_folder)
    
    def delete_folders(self, path):
        for root, dirs, files in os.walk(path, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))
    
    def itemize_revisions(self, folder_map, revisions={}):
        for c in folder_map['contents']:
            if c['type'] == 'application/vnd.google-apps.folder':
                revisions = self.itemize_revisions(c, revisions=revisions)
            else:
                if 'revisions' in c.keys():
                    if len(c['revisions']) >= 100:
                        print('Warning: maximum number of Google Drive revisions used or exceeded by source file.')
                    for i, r in enumerate(c['revisions']):
                        revision = {
                            'path': c['path'],
                            'id': c['id'],
                            'rid': r['id'],
                            'version': i + 1
                        }
                        k = r['modifiedTime']
                        v = revisions.get(k, [])
                        if revision['rid'] not in [i['rid'] for i in v]:  # avoids duplicates if rerun
                            v.append(revision)
                            revisions.update({k: v})

        return revisions
    
    def bundle_commits(self, folder_map, minutes=240):
        # get commits
        commits = self.itemize_revisions(folder_map)
        dates = sorted(commits)

        # set time zones
        utc = pytz.timezone('UTC')
        tz = pytz.timezone('US/Eastern')

        # initialize variables
        out = {}
        rdates = []
        cdates = []
        comms = []

        # loop through sorted dates
        while len(dates) > 0:
            rdate = dates.pop(0)  # oldest -> newest
            parsed_date = datetime.datetime.strptime(rdate, '%Y-%m-%dT%H:%M:%S.%fZ')
            cdate = utc.localize(parsed_date).astimezone(tz)
            com = commits.get(rdate)

            # existing bundle
            if len(cdates) > 0:
                # within bundle threshold
                if (cdate - cdates[-1]).total_seconds() / 60 < minutes:  # time from previous edit
                    # grow bundle
                    rdates.append(rdate)
                    cdates.append(cdate)
                    comms += com  # extend list
                else:
                    # append previous bundle
                    out.update({rdates[-1]: {'cdate': cdates[-1], 'rdates': rdates, 'commits': comms}})
                    
                    # start new bundle
                    rdates = [rdate]
                    cdates = [cdate]
                    comms = com
                if len(dates) == 0:
                    # append final bundle
                    out.update({rdates[-1]: {'cdate': cdates[-1], 'rdates': rdates, 'commits': comms}})
            else:
                # start new bundle
                rdates = [rdate]
                cdates = [cdate]
                comms = com

        out = {d: out[d] for d in sorted(out)}
        return out
    
    def make_repo(self, folder_map, minutes=240, root_folder=os.getcwd()):
        # get name and commit info
        name = folder_map['path']
        bundled_commits = self.bundle_commits(folder_map, minutes)
        
        # remove any existing git folders
        if os.path.isdir(os.path.join(root_folder, name, '.git')):
            self.delete_folders(os.path.join(root_folder, name, '.git'))
            print('Old git folder removed.\n')

        # set up git repo
        print('Initializing git repo.')
        repo = git.Repo.init(os.path.join(root_folder, name), expand_vars=False)
        author = git.Actor(name='Craig N', email='7h47ch@gmail.com')
        utc = pytz.timezone('UTC')
        tz = pytz.timezone('US/Eastern')

        # create folders
        print('Creating folder structure.\n')
        g.create_folders(folder_map, root_folder=root_folder)

        # auto-commits
        for i, (k, v) in enumerate(bundled_commits.items()):
            # make files
            cdate = v['cdate']
            comms = v['commits']
            print(f'Auto-commit {i+1}, adding {len(comms)} updates bundled from {k}...')
            for c in comms:
                path = os.path.join(root_folder, c['path'])
                print(f'\t{c["path"]}, v{c["version"]}')
                self.stream_file(c['id'], r=c['rid'], out=path)
                # may need to attempt to read if unsure whether writing in progress

            # commit to repo
            repo.index.add([os.path.join(root_folder, c['path']) for c in comms])
            repo.index.commit(f'Auto-commit {i+1} (via Google Drive-to-git tool).', author=author, committer=author,
                              author_date=cdate, commit_date=cdate)  # add author or committer args
            
        print(f'\nGit repo written!')

## Commit to new `git` repo
The GitPython package is as lazy as possible meaning that it takes arguments from existing git environmental variables where possible.

Connect to API.

In [3]:
g = GoogleDrive()

Scout folders, files, and revisions and create git repo.

In [4]:
folder_results = g.get_id('google_workspace', ftype='folder')
folder_map = g.map_folder(folder_results[0])
g.make_repo(folder_map, root_folder='C:\\Users\\7h47c\\Desktop')

Initializing git repo.
Creating folder structure.

Auto-commit 1, adding 20 updates bundled from 2022-03-13T04:16:36.000Z...
	google_workspace\credentials.json, v1
	google_workspace\.ipynb_checkpoints\credentials-checkpoint.json, v1
	google_workspace\google_apis.ipynb, v1
	google_workspace\.ipynb_checkpoints\google_apis-checkpoint.ipynb, v1
	google_workspace\credentials.json, v2
	google_workspace\google_apis.ipynb, v2
	google_workspace\.ipynb_checkpoints\google_apis-checkpoint.ipynb, v2
	google_workspace\google_apis.ipynb, v3
	google_workspace\.ipynb_checkpoints\google_apis-checkpoint.ipynb, v3
	google_workspace\google_apis.ipynb, v4
	google_workspace\.ipynb_checkpoints\google_apis-checkpoint.ipynb, v4
	google_workspace\google_apis.ipynb, v5
	google_workspace\.ipynb_checkpoints\google_apis-checkpoint.ipynb, v5
	google_workspace\google_apis.ipynb, v6
	google_workspace\.ipynb_checkpoints\google_apis-checkpoint.ipynb, v6
	google_workspace\google_apis.ipynb, v7
	google_workspace\.ipynb_che

HttpError: <HttpError 404 when requesting https://www.googleapis.com/drive/v3/files/10pnOxpaBS_uPx_gKcORm6DqBKB4UUQHZ/revisions/0B78DmzHQDrSOc1c1RmZoL1B2a0lieGJZWUErK3ZaZ0xudnBjPQ?alt=media returned "Revision not found: 0B78DmzHQDrSOc1c1RmZoL1B2a0lieGJZWUErK3ZaZ0xudnBjPQ.". Details: "[{'domain': 'global', 'reason': 'notFound', 'message': 'Revision not found: 0B78DmzHQDrSOc1c1RmZoL1B2a0lieGJZWUErK3ZaZ0xudnBjPQ.', 'locationType': 'parameter', 'location': 'revisionId'}]">