Install python requirements using:
```
pip install oauth2client
pip install google-api-python-client
pip install gspread
```

Replace `CREDENTIAL_FILE` with the path to the service account key file. Key file can be retrieved from:
https://drive.google.com/a/harvard.edu/file/d/0BxTcuuFIY3htdTVnd1lvYm5vWjg/view?usp=sharing


Share documents with `doc-revisions@appspot.gserviceaccount.com` (requires edit access) in order to view revisions.

In [117]:
import sys
sys.path.append('../scripts')
from gdoc_revisions import *

In [119]:
gdrive_api = build('drive', 'v3', credentials=credentials)
Document(FILE_ID).download_revision_details()


NameError: global name 'drive_api' is not defined

In [2]:
# location of service account json credential file for doc-revisions@appspot.gserviceaccount.com
CREDENTIAL_FILE = 'doc-revisions-98c56004e106.json'

In [41]:

from oauth2client.service_account import ServiceAccountCredentials
from apiclient.discovery import build
from httplib2 import Http
import json

scope = ['https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name(CREDENTIAL_FILE, scope)
gdrive_api = build('drive', 'v3', credentials=credentials)



In [112]:


class Document(object):
    '''
    Google doc class
    Provides ability to download revisions
    assumes access to 'gdrive_api', an instance of googleapiclient.discovery.Resource
    '''
    def __init__(self, file_id):
        self.metadata = drive_api.files().get(fileId=file_id).execute()
        self.name = self.metadata['name']
        self.file_id = file_id
        
    def _last_revision_id(self):
        '''
        Return the id of the last revision to a document, using the offical google api v3
        '''
        revision_metainfo = gdrive_api.revisions().list(fileId=self.file_id).execute()
        return revision_metainfo['revisions'][-1]['id']
    
    def _generate_revision_url(self, start, end):
        '''
        Generates a url for downloading revision details (using undocumented google api endpoint)
        '''
        base_url = 'https://docs.google.com/document/d/{file_id}/revisions/load?id={file_id}&start={start}&end={end}'
        url = base_url.format(file_id=self.file_id,start=start,end=end)
        return url

    def download_revision_details(self):
        '''
        download json-like data with revision info
        '''
        http_auth = credentials.authorize(Http())
        last_revision_id = self._last_revision_id()
        url = self._generate_revision_url(start=1,end=last_revision_id)
        raw_text = http_auth.request(url)[1][5:]
        return json.loads(raw_text)
    
        
        
Document(FILE_ID).download_revision_details()



{u'changelog': [[{u'mts': [{u'ei': 1,
      u'fm': False,
      u'si': 1,
      u'sm': {u'ps_al_i': True,
       u'ps_awao_i': True,
       u'ps_ifl_i': True,
       u'ps_il_i': True,
       u'ps_ir_i': True,
       u'ps_klt_i': True,
       u'ps_kwn_i': True,
       u'ps_ls_i': True,
       u'ps_sa_i': True,
       u'ps_sb_i': True,
       u'ps_sm_i': True},
      u'st': u'paragraph',
      u'ty': u'as'},
     {u'ei': 0,
      u'fm': False,
      u'si': 0,
      u'sm': {u'lgs_l': u'en'},
      u'st': u'language',
      u'ty': u'as'},
     {u'ei': 1,
      u'fm': False,
      u'si': 0,
      u'sm': {u'ts_bd_i': True,
       u'ts_bgc_i': True,
       u'ts_ff_i': True,
       u'ts_fgc_i': True,
       u'ts_fs_i': True,
       u'ts_it_i': True,
       u'ts_sc_i': True,
       u'ts_st_i': True,
       u'ts_un_i': True,
       u'ts_va_i': True},
      u'st': u'text',
      u'ty': u'as'},
     {u'ei': 0,
      u'fm': False,
      u'si': 0,
      u'sm': {u'hs_h1': {u'sdef_ps': {u'ps_al': 0,
 

In [98]:
FILE_ID = '1aSAA-ZA8bGvJSpgFhVgJu89EDMPMRm5IrcdeV85JmJE'

In [43]:
revisions_data = download_revision_details(df_docs.file_id[4], credentials)

In [45]:
revisions_data.keys

{u'changelog': [[{u'mts': [{u'ei': 1,
      u'fm': False,
      u'si': 1,
      u'sm': {u'ps_al_i': True,
       u'ps_awao_i': True,
       u'ps_ifl_i': True,
       u'ps_il_i': True,
       u'ps_ir_i': True,
       u'ps_klt_i': True,
       u'ps_kwn_i': True,
       u'ps_ls_i': True,
       u'ps_sa_i': True,
       u'ps_sb_i': True,
       u'ps_sm_i': True},
      u'st': u'paragraph',
      u'ty': u'as'},
     {u'ei': 0,
      u'fm': False,
      u'si': 0,
      u'sm': {u'lgs_l': u'en'},
      u'st': u'language',
      u'ty': u'as'},
     {u'ei': 1,
      u'fm': False,
      u'si': 0,
      u'sm': {u'ts_bd_i': True,
       u'ts_bgc_i': True,
       u'ts_ff_i': True,
       u'ts_fgc_i': True,
       u'ts_fs_i': True,
       u'ts_it_i': True,
       u'ts_sc_i': True,
       u'ts_st_i': True,
       u'ts_un_i': True,
       u'ts_va_i': True},
      u'st': u'text',
      u'ty': u'as'},
     {u'ei': 0,
      u'fm': False,
      u'si': 0,
      u'sm': {u'hs_h1': {u'sdef_ps': {u'ps_al': 0,
 

In [53]:
service.revisions().get(fileId=df_docs.file_id[4],revisionId=148).execute()

{u'id': u'148',
 u'kind': u'drive#revision',
 u'mimeType': u'application/vnd.google-apps.document',
 u'modifiedTime': u'2016-04-20T15:55:32.881Z'}

In [89]:
revisions_data = download_revision_details(df_docs.file_id[4], credentials)

revisions = []
for r in revisions_data['changelog']:
    revisions.append(Revision(r))

In [92]:
revision = revisions[1]
revision.operation

<__main__.InsertString at 0x11550ba10>

In [88]:
class Revision(object):
    def __init__(self, r):
        self.time = r[1]
        self.user_id = r[2]
        self.revision_id = r[3]
        self.session = r[4]
        self.session_revision = r[5]
        self.raw = r
        self.operation = self.get_operation(r[0])
        
    def get_operation(self, data):
        subclass_mapping = {
            'is': InsertString,
            'ds': DeleteString,
        }
        operation_subclass = subclass_mapping.get(data['ty'])
        if operation_subclass:
            return operation_subclass(data)
        else:
            return Operation(data)

class Operation(object):
    def __init__(self, data):
        self.raw = data
        
class InsertString(Operation):
    def __init__(self, data):
        super(InsertString, self).__init__(data)
        self.string = data['s']
        self.start_index = data['ibi']
        self.type = 'insert string'

class DeleteString(Operation):
    def __init__(self, data):
        super(DeleteString, self).__init__(data)
        self.start_index = data['si']
        self.end_index = data['ei']
        self.type = 'delete string'
        
class MultiOperation(Operation):
    def __init__(self, data):
        super(MultiOperation, self).__init__(data)
        self.operations = [Operation(d).get_subclass() for d in data['mts']]
        self.type = 'multiple operations' 
        


In [23]:
operation1 = {u'ibi': 1, u's': u'*', u'ty': u'is'}
operation2 = {u'ei': 1, u'si': 1, u'ty': u'ds'}

io = InsertString(operation1)
do = DeleteString(operation2)
mo = MultiOperation(operation3)

# MultiOperation(operation3).operations
# Operation(operation1).convert_to_subclass().start_index
mo.operations


operation3 = {u'mts': [{u'ibi': 1, u's': u'*', u'ty': u'is'},
   {u'epm': {u'ee_eo': {u'eo_ad': None,
      u'eo_mb': 9.0,
      u'eo_ml': 9.0,
      u'eo_mr': 9.0,
      u'eo_mt': 9.0,
      u'eo_type': 0,
      u'i_cid': u'1WEHN3jFyCcY-TKXU0fFyI2WRrtbf92kZHC1lVW0',
      u'i_ht': 321.0,
      u'i_src': u'',
      u'i_wth': 468.0}},
    u'et': u'inline',
    u'id': u'kix.251nqehz9grw',
    u'ty': u'ae'},
   {u'id': u'kix.251nqehz9grw', u'spi': 1, u'ty': u'te'},
   {u'ibi': 2, u's': u'*', u'ty': u'is'},
   {u'epm': {u'ee_eo': {u'eo_ad': None,
      u'eo_mb': 9.0,
      u'eo_ml': 9.0,
      u'eo_mr': 9.0,
      u'eo_mt': 9.0,
      u'eo_type': 0,
      u'i_cid': u'1zpAiTO5AYkqUyYyb6qDLvCcrzMrsLBOz7r8mWo8',
      u'i_ht': 406.0,
      u'i_src': u'',
      u'i_wth': 600.0}},
    u'et': u'inline',
    u'id': u'kix.htedge1dtn4m',
    u'ty': u'ae'},
   {u'id': u'kix.htedge1dtn4m', u'spi': 2, u'ty': u'te'},
   {u'ibi': 3, u's': u'*', u'ty': u'is'},
   {u'epm': {u'ee_eo': {u'eo_ad': None,
      u'eo_mb': 9.0,
      u'eo_ml': 9.0,
      u'eo_mr': 9.0,
      u'eo_mt': 9.0,
      u'eo_type': 0,
      u'i_cid': u'1HiaCq9FRLI8mQkmG8lUhUMau2ef40gQNn4gToYY',
      u'i_ht': 482.0,
      u'i_src': u'',
      u'i_wth': 600.0}},
    u'et': u'inline',
    u'id': u'kix.9t9xliu0kvnc',
    u'ty': u'ae'},
   {u'id': u'kix.9t9xliu0kvnc', u'spi': 3, u'ty': u'te'}],
  u'ty': u'mlti'}