# Google Workspace APIs
Use Google's Workspace API (managed here https://console.cloud.google.com/) for various tasks.

Credentials can be downloaded as `credentials.json` from the API Credentials and used to produce `token.json` for persistent use. OAuth 2.0 requires the redirect URI in the app and in the "Flow" - within the code - to be the same (the default port for localhost is 8080). It's also worth noting that for apps in "testing," users need to be added via the OAuth consent screen, although in testing "scope" does not need to be specified in the API (it can be tested and switched in the code).

TODO:
- fix bundle == max_commits
- split git functions into another class
- add user config method
- account for revisions not being found
- convert to `.py` for CLI use
- fix recursive functions holding terminal output in argument line


- COMPLETE: add folder/file ignore features
- COMPLETE: enable use of folder ID from Google Drive rather than using `folder`
- COMPLETE: use only the last bundle of files per commit
- COMPLETE: fix bundle commits to be committed on last date, rather than first
- COMPLETE: bundle commits oldest to newest (to prevent older versions overwriting newer ones within bundles)
- REMOVED: sync file ignore and gitignore (split between ignore_folders and ignore_files)

Import packages.

In [None]:
import io
import os
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
import time
import pytz
import datetime
import git

Google API class (based on `quickstart.py`).

In [None]:
class GoogleDrive:
    def __init__(self):
        # delete token.json before changing these
        self.scopes = [
            # 'https://www.googleapis.com/auth/drive.metadata.readonly',
            'https://www.googleapis.com/auth/drive.readonly'
        ]
        self.creds = None
        self.credentials()
        self.connect()
    
    def credentials(self):
        # store credentials (user access and refresh tokens)
        if os.path.exists('token.json'):
            self.creds = Credentials.from_authorized_user_file('token.json', self.scopes)
        # if no (valid) credentials available, let user log in
        if not self.creds or not self.creds.valid:
            if self.creds and self.creds.expired and self.creds.refresh_token:
                self.creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file('credentials.json', self.scopes)
                self.creds = flow.run_local_server()  # port MUST match redirect URI in Google App
            # save credentials for the next run
            with open('token.json', 'w') as token:
                token.write(self.creds.to_json())
                
    def connect(self):
        # attempt to connect to the API
        try:
            self.service = build('drive', 'v3', credentials=self.creds)
            # self.service = build('gmail', 'v1', credentials=self.creds)  # use later for gmail...
        except HttpError as error:
            print(f'An error occurred: {error}')
            
    def id_get(self, i):
        r = self.service.files().get(fileId=i).execute()
        
        return r
            
    def id_search(self, values, term='name', operator='=', ftype='file', ignore_trashed=True):
        q = f'{term} {operator} "{values}" '
        if ftype == 'folder':
            q += 'and mimeType = "application/vnd.google-apps.folder" '
        elif ftype == 'json':
            q += 'and mimeType = "application/json" '
        if ignore_trashed:
            q += 'and trashed = false'
        l = self.service.files().list(q=q).execute()
        
        return l['files']
            
    def folder_contents(self, i, ignore_trashed=True):
        q = f'"{i}" in parents '
        if ignore_trashed:
            q += 'and trashed = false '
        l = self.service.files().list(q=q).execute()
        
        return l['files']
            
    def get_revisions(self, i):
        try:
            r = self.service.revisions().list(fileId=i).execute()
        
            return r['revisions']
        
        except:
            return
        
    def qry_fields(self, i, r=None, fields=['parents']):
        if r is None:
            p = self.service.files().get(fileId=i, fields=','.join(fields)).execute()
        else:
            p = self.service.revisions().get(fileId=i, revisionId=r, fields=','.join(fields)).execute()
        
        return {f: p[f] for f in fields}
    
    def stream_file(self, i, r=None, out='stream', verbose=False):
        if r is None:
            request = self.service.files().get_media(fileId=i)
        else:
            request = self.service.revisions().get_media(fileId=i, revisionId=r)
        
        if out in ['stream', 'str']:
            stream = io.BytesIO()
        else:
            stream = io.FileIO(out, mode='w')
        downloader = MediaIoBaseDownload(stream, request)
        done = False
        while not done:
            status, done = downloader.next_chunk()
            if verbose:
                print(f'Download {int(status.progress() * 100)}%')
        if verbose:
            print(f'Size {status.total_size / 1024 / 1024:.2f}MB')

        if out in ['str']:
            return stream.getvalue()
        else:
            return stream

    
class Drive2Git:
    def __init__(self, drive, folder, local_path=os.getcwd(), ignore_folders=[], ignore_files=[]):
        self.drive = drive
        self.folder = self.check_object(folder)
        self.local_path = local_path
        self.ignore_folders = ignore_folders
        self.ignore_files = ignore_files
        self.folder_map = self.map_folder(folder)
    
    def check_object(self, obj):
        # check case: id
        if type(obj) == str:
            print('Getting object info using ID.')
            obj = self.drive.id_get(obj)
        elif type(obj) == list:
            print('Getting object info from first list item.')
            
        return obj
    
    def check_ignore(self, obj, ignorances):
        flag = False
        if obj['name'] in ignorances:
            flag = True
            
        return flag
        
    def map_folder(self, folder, path=''):
        '''
        Recursive.
        '''
        # check if id used
        folder = self.check_object(folder)
        
        # if root, set path to folder name
        if path == '':
            path = folder['name']

        # scan contents
        contents = []
        for c in self.drive.folder_contents(folder['id']):
            if c['mimeType'] == 'application/vnd.google-apps.folder':
                p = os.path.join(path, c['name'])
                contents.append(self.map_folder(c, path=p))
            else:
                f = {
                    'path': os.path.join(path, c['name']),
                    'id': c['id'],
                    'name': c['name'],
                    'type': c['mimeType'],
                    'gitignore': self.check_ignore(folder, self.ignore_folders) | self.check_ignore(c, self.ignore_files),
                    'revisions': self.drive.get_revisions(c['id'])
                }
                contents.append(f)
                
        # set up output dictionary
        out = {
            'path': path,
            'id': folder['id'],
            'name': folder['name'],
            'type': folder['mimeType'],
            'gitignore': self.check_ignore(folder, self.ignore_folders),
            'contents': contents
        }
                
        return out
    
    def create_folders(self, folder_map):
        '''
        Recursive.
        '''
        # create "root" folder
        try:
            os.mkdir(os.path.join(self.local_path, folder_map['path']))
        except:
            pass
        
        # map folder
        for c in folder_map['contents']:
            if 'contents' in c.keys():
                self.create_folders(c)
    
    def delete_folders(self, path):
        for root, dirs, files in os.walk(path, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
                os.rmdir(os.path.join(root, name))
    
    def itemize_revisions(self, folder_map, revisions={}):
        '''
        Recursive.
        '''
        for c in folder_map['contents']:
            if c['type'] == 'application/vnd.google-apps.folder':
                revisions = self.itemize_revisions(c, revisions=revisions)
            else:
                if 'revisions' in c.keys():
                    if len(c['revisions']) >= 100:
                        print(f'Warning: maximum number of Google Drive revisions used or exceeded by {c["name"]}.')
                    for i, r in enumerate(c['revisions']):
                        revision = {
                            'path': c['path'],
                            'id': c['id'],
                            'rid': r['id'],
                            'name': c['name'],
                            'gitignore': c['gitignore'],
                            'version': i + 1
                        }
                        k = r['modifiedTime']
                        v = revisions.get(k, [])
                        if revision['rid'] not in [i['rid'] for i in v]:  # avoids duplicates if rerun
                            v.append(revision)
                            revisions.update({k: v})

        return revisions
    
    def bundle_commits(self, minutes=240):
        # get commits
        commits = self.itemize_revisions(self.folder_map)
        dates = sorted(commits)

        # set time zones
        utc = pytz.timezone('UTC')
        tz = pytz.timezone('US/Eastern')

        # initialize variables
        bundle = {}
        rdates = []
        cdates = []
        comms = []

        # loop through sorted dates
        while len(dates) > 0:
            rdate = dates.pop(0)  # oldest -> newest
            parsed_date = datetime.datetime.strptime(rdate, '%Y-%m-%dT%H:%M:%S.%fZ')
            cdate = utc.localize(parsed_date).astimezone(tz)
            com = commits.get(rdate)

            # existing bundle
            if len(cdates) > 0:
                # within bundle threshold
                if (cdate - cdates[-1]).total_seconds() / 60 < minutes:  # time from previous edit
                    # grow bundle
                    rdates.append(rdate)
                    cdates.append(cdate)
                    comms += com  # extend list
                else:
                    # append previous bundle
                    bundle.update({rdates[-1]: {
                        'cdate': cdates[-1],
                        # 'rdates': rdates,
                        'files': comms
                    }})
                    
                    # start new bundle
                    rdates = [rdate]
                    cdates = [cdate]
                    comms = com
                if len(dates) == 0:
                    # append final bundle
                    bundle.update({rdates[-1]: {
                        'cdate': cdates[-1],
                        # 'rdates': rdates,
                        'files': comms
                    }})
            else:
                # start new bundle
                rdates = [rdate]
                cdates = [cdate]
                comms = com

        # sort bundle by keys
        self.bundle = dict(sorted(bundle.items()))
    
    def max_versions(self):
        bundle = self.bundle.copy()

        for _, v in bundle.items():
            commits = v['files']
            max_versions = {}
            for c in commits:
                i = max_versions.get(c['id'], {})
                if len(i) == 0:
                    max_versions.update({c['id']: c})
                else:
                    if c['version'] > i['version']:
                        max_versions.update({c['id']: c})

            v['files'] = list(max_versions.values())

        self.max_commits = bundle
        
    def gitignore(self, name):
        file_path = os.path.join(self.local_path, name, '.gitignore')
        
        # remove old .gitignore file
        if os.path.exists(file_path):
            os.remove(file_path)
            
        # add new .gitignore file
        files = self.ignore_folders + self.ignore_files
        with open(file_path, 'w') as f:
            lines = [f'**/{l}\n' for l in files]
            f.writelines(lines)
    
    def make_repo(self, minutes=240):
        # get name and commit info
        name = self.folder_map['path']
        self.bundle_commits(minutes)
        self.max_versions()
        
        # remove any existing git folders
        git_path = os.path.join(self.local_path, name, '.git')
        if os.path.isdir(git_path):
            self.delete_folders(git_path)
            print('Old git folder removed.\n')

        # configure git repo
        print('Configuring git repo.')
        repo = git.Repo.init(os.path.join(self.local_path, name), expand_vars=False)
        author = git.Actor(name='Craig N', email='7h47ch@gmail.com')
        utc = pytz.timezone('UTC')
        tz = pytz.timezone('US/Eastern')

        # create folders - move up???
        print('Creating folder structure.\n')
        self.create_folders(self.folder_map)

        # auto-commits
        for i, (k, v) in enumerate(self.max_commits.items()):
            # make files
            cdate = v['cdate']
            files = v['files']
            print(f'Auto-commit {i+1}, adding {len(files)} updates bundled from {k}...')
            for f in files:
                file_path = os.path.join(self.local_path, f['path'])
                print(f'\t{f["path"]}, v{f["version"]}')
                try:
                    self.stream_file(f['id'], r=f['rid'], out=file_path)
                except:
                    print('\t\tFile error!')

                # add file
                if not f['gitignore']:
                    repo.index.add([file_path])
                else:
                    print(f'\t\tNot added to commit.')
                    
            # add gitignore
            self.gitignore(name)
            ignore_path = os.path.join(self.local_path, name, '.gitignore')
            repo.index.add([ignore_path])
            
            repo.index.commit(f'Auto-commit {i+1} (via Google Drive-to-git tool).', author=author, committer=author,
                              author_date=cdate, commit_date=cdate)  # add author or committer args
            
        print(f'\nNew git folder written!')

## Commit to new `git` repo
The GitPython package is as lazy as possible meaning that it takes arguments from existing git environmental variables where possible.

Connect to API.

In [None]:
drive = GoogleDrive()

Scout folders, files, and revisions and create git repo.

In [None]:
# folder = drive.id_search('google_workspace', ftype='folder')[0]
folder = drive.id_get('10oJBulB5E6_zdBhuuV6Qk41yTWPKoo8D')

g = Drive2Git(drive, folder, local_path='C:\\Users\\7h47c\\Desktop',
            ignore_folders=['.ipynb_checkpoints'],
            ignore_files=['credentials.json','token.json'])

g.make_repo()