# Google Workspace APIs
Use Google's Workspace API (managed here https://console.cloud.google.com/) for various tasks.

Credentials can be downloaded as `credentials.json` from the API Credentials and used to produce `token.json` for persistent use. OAuth 2.0 requires the redirect URI in the app and in the "Flow" - within the code - to be the same (the default port for localhost is 8080). It's also worth noting that for apps in "testing," users need to be added via the OAuth consent screen, although in testing "scope" does not need to be specified in the API (it can be tested and switched in the code).

Import packages.

In [None]:
import io
import os
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from googleapiclient.http import MediaIoBaseDownload
import pytz
import datetime
import git

Google API class (based on `quickstart.py`).

In [None]:
class GoogleDrive:
    def __init__(self):
        # delete token.json before changing these
        self.scopes = [
            # 'https://www.googleapis.com/auth/drive.metadata.readonly',
            'https://www.googleapis.com/auth/drive.readonly'
        ]
        self.creds = None
        self.credentials()
        self.connect()
    
    def credentials(self):
        # store credentials (user access and refresh tokens)
        if os.path.exists('token.json'):
            self.creds = Credentials.from_authorized_user_file('token.json', self.scopes)
        # if no (valid) credentials available, let user log in
        if not self.creds or not self.creds.valid:
            if self.creds and self.creds.expired and self.creds.refresh_token:
                self.creds.refresh(Request())
            else:
                flow = InstalledAppFlow.from_client_secrets_file('credentials.json', self.scopes)
                self.creds = flow.run_local_server()  # port MUST match redirect URI in Google App
            # save credentials for the next run
            with open('token.json', 'w') as token:
                token.write(self.creds.to_json())
                
    def connect(self):
        # attempt to connect to the API
        try:
            self.service = build('drive', 'v3', credentials=self.creds)
            # self.service = build('gmail', 'v1', credentials=self.creds)  # use later for gmail...
        except HttpError as error:
            print(f'An error occurred: {error}')
            
    def get_id(self, values, term='name', operator='=', ftype='file', ignore_trashed=True):
        q = f'{term} {operator} "{values}" '
        if ftype == 'folder':
            q += 'and mimeType = "application/vnd.google-apps.folder" '
        elif ftype == 'json':
            q += 'and mimeType = "application/json" '
        if ignore_trashed:
            q += 'and trashed = false'
        l = self.service.files().list(q=q).execute()
        
        return l['files']
            
    def folder_contents(self, i, ignore_trashed=True):
        q = f'"{i}" in parents '
        if ignore_trashed:
            q += 'and trashed = false '
        l = g.service.files().list(q=q).execute()
        
        return l['files']
        
    def get_revisions(self, i):
        try:
            r = self.service.revisions().list(fileId=i).execute()
        
            return r['revisions']
        
        except:
            return
        
    def qry_fields(self, i, r=None, fields=['parents']):
        if r is None:
            p = self.service.files().get(fileId=i, fields=','.join(fields)).execute()
        else:
            p = self.service.revisions().get(fileId=i, revisionId=r, fields=','.join(fields)).execute()
        
        return {f: p[f] for f in fields}
    
    def stream_file(self, i, r=None, out='stream', verbose=False):
        if r is None:
            request = self.service.files().get_media(fileId=i)
        else:
            request = self.service.revisions().get_media(fileId=i, revisionId=r)
        
        if out in ['stream', 'str']:
            stream = io.BytesIO()
        else:
            stream = io.FileIO(out, mode='w')
        downloader = MediaIoBaseDownload(stream, request)
        done = False
        while not done:
            status, done = downloader.next_chunk()
            if verbose:
                print(f'Download {int(status.progress() * 100)}%')
        if verbose:
            print(f'Size {status.total_size / 1024 / 1024:.2f}MB')

        if out in ['str']:
            return stream.getvalue()
        else:
            return stream

Connect to API.

In [None]:
g = GoogleDrive()

## Get data

In [None]:
# get file info
matches = g.get_id('demographics_module_v2.ipynb')
parents = [g.qry_fields(m['id']) for m in matches][0]
revisions = [g.get_revisions(m['id']) for m in matches][0]
orig = [{m['name']: g.qry_fields(m['id'], fields=['originalFilename'])} for m in matches]

## Commit to new `git` repo

In [None]:
# set up repo
# path = os.path.join('C:/', 'Users/7h47c/Desktop', 'my-new-repo')  # Windows issues with path
path = os.path.join(os.getcwd(), 'my-new-repo')
repo = git.Repo.init(path, expand_vars=False)
author = git.Actor(name='Craiggers', email='7h47ch@gmail.com')
utc = pytz.timezone('UTC')
tz = pytz.timezone('US/Eastern')

# get matches and revisions
match = g.get_id('demographics_module_v2.ipynb')[0]
revisions = g.get_revisions(match['id'])

for i, r in enumerate(revisions):
    # get file info
    file_path = os.path.join(path, match['name'])

    # set commit date
    parsed_date = datetime.datetime.strptime(r['modifiedTime'], '%Y-%m-%dT%H:%M:%S.%fZ')
    cdate = utc.localize(parsed_date).astimezone(tz)
    print(r['modifiedTime'], git.objects.util.parse_date(cdate))

    # make file
    g.stream_file(match['id'], r=r['id'], out=file_path)
    while not os.path.exists(file_path):
        datetime.time.sleep(1)

    # commit to repo
    repo.index.add([file_path])
    repo.index.commit(f'Version {i+1} commit.', author=author, committer=author,
                      author_date=cdate, commit_date=cdate)  # add author or committer args
    
    # remove temporary file
    if r['id'] != revisions[-1]['id']:
        os.remove(file_path)