/
backup.py
executable file
·144 lines (127 loc) · 6.75 KB
/
backup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/usr/bin/env python3
import sys
import httplib2
import os, errno
import datetime
from apiclient import discovery
from oauth2client import client
from oauth2client import tools
from oauth2client.file import Storage
import argparse
parser = argparse.ArgumentParser(parents=[tools.argparser])
parser.add_argument("destination", help="The local folder to which the Google Drive backup will be made. Existing files will be overwritten.")
parser.add_argument("--redownload-docs", help="Re-download Google Documents even if timestamps show no changes have been made.", action='store_true')
parser.add_argument("--redownload-files", help="Re-download binary files even if MD5 checksums match local files.", action='store_true')
parser.add_argument("--trashed", help="This will download items located in the Trash, instead of the regular drive.", action='store_true')
parser.add_argument("--credential-file", help="Location of the credential file storing user credentials (default: user.json).", default="user.json")
flags = parser.parse_args()
SCOPES = "https://www.googleapis.com/auth/drive.readonly"
CLIENT_SECRET_FILE = "client_secret.json"
APPLICATION_NAME = "Google Drive Backup"
def get_credentials(credential_path):
store = Storage(credential_path)
credentials = store.get()
if not credentials or credentials.invalid:
flow = client.flow_from_clientsecrets(CLIENT_SECRET_FILE, SCOPES)
flow.user_agent = APPLICATION_NAME
credentials = tools.run_flow(flow, store, flags)
print("Storing credentials to " + credential_path)
return credentials
# https://developers.google.com/drive/v3/web/manage-downloads
MIME_MAPPINGS = {
"application/vnd.google-apps.document": ("application/vnd.openxmlformats-officedocument.wordprocessingml.document", "docx"),
"application/vnd.google-apps.spreadsheet": ("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xlsx"),
"application/vnd.google-apps.drawing": ("image/svg+xml", "svg"),
"application/vnd.google-apps.presentation": ("application/vnd.openxmlformats-officedocument.presentationml.presentation", "pptx"),
"application/vnd.google-apps.script": ("application/vnd.google-apps.script+json", "json"),
}
def main():
credentials = get_credentials(flags.credential_file)
http = credentials.authorize(httplib2.Http())
service = discovery.build("drive", "v3", http=http)
def backup_folder(folder_id, destination):
mkdirp(destination)
next_page_token = ""
query = "trashed = {0} and '{1}' in parents".format(flags.trashed, folder_id)
while True:
result = service.files().list(fields="nextPageToken, files(id, name, mimeType, size, md5Checksum, modifiedTime, viewedByMeTime)", q=query, orderBy="name", pageToken=next_page_token).execute()
for item in result["files"]:
backup_file(item, destination)
if "nextPageToken" in result:
next_page_token = result["nextPageToken"]
else:
break
backed_up_files = set()
def rename(filename, file_id):
i = filename.rfind(".")
if i == -1:
return filename + "-" + file_id
return filename[:i] + "-" + file_id + filename[i:]
def clean(filename):
filename = filename.replace("/", "-")
filename = filename.replace("\\", "-")
filename = filename.replace(": ", " ")
filename = filename.replace(":", " ")
return filename
def check_name(destination, filename, file_id):
key = (destination, filename)
if key in backed_up_files:
new_filename = rename(filename, file_id)
print("WARNING:", filename, "already exists in", destination + ".", "Renaming to", new_filename + ".", file=sys.stderr)
key = (destination, new_filename)
filename = new_filename
backed_up_files.add(key)
return clean(filename)
def backup_file(item, destination):
if item["mimeType"] == "application/vnd.google-apps.folder":
folder_name = os.path.join(destination, check_name(destination, item["name"], item["id"]))
backup_folder(item["id"], folder_name)
elif item["mimeType"] in MIME_MAPPINGS:
export_mime_type, extension = MIME_MAPPINGS[item["mimeType"]]
output_file = os.path.join(destination, check_name(destination, "{0}.{1}".format(item["name"], extension), item["id"]))
modified_time = parse_time(item["modifiedTime"])
if not flags.redownload_docs and os.path.exists(output_file) and modified_time.timestamp() == os.stat(output_file).st_mtime:
print("Unchanged", output_file)
else:
print("Downloading", output_file)
data = service.files().export(fileId=item["id"], mimeType=export_mime_type).execute()
with open(output_file, "wb") as output:
output.write(data)
viewed_time = parse_time(item["viewedByMeTime"]) if "viewedByMeTime" in item else modified_time
os.utime(output_file, times=(viewed_time.timestamp(), modified_time.timestamp()))
elif "size" in item: # Binary file
output_file = os.path.join(destination, check_name(destination, item["name"], item["id"]))
if not flags.redownload_files and \
"md5Checksum" in item and \
os.path.exists(output_file) and \
os.path.getsize(output_file) == int(item["size"]) and \
md5(output_file) == item["md5Checksum"]:
print("Unchanged", output_file)
else:
print("Downloading", output_file)
data = service.files().get_media(fileId=item["id"]).execute()
with open(output_file, "wb") as output:
output.write(data)
modified_time = parse_time(item["modifiedTime"])
viewed_time = parse_time(item["viewedByMeTime"]) if "viewedByMeTime" in item else modified_time
os.utime(output_file, times=(viewed_time.timestamp(), modified_time.timestamp()))
else:
print("WARNING: Cannot handle", item["mimeType"], item["id"], item["name"], file=sys.stderr)
backup_folder("root", flags.destination)
def mkdirp(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
import hashlib
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def parse_time(time):
return datetime.datetime.strptime(time, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=datetime.timezone.utc).replace(microsecond=0)
if __name__ == "__main__":
main()