forked from mozilla-releng/balrog
-
Notifications
You must be signed in to change notification settings - Fork 5
/
get-prod-db-dump.py
100 lines (77 loc) · 2.98 KB
/
get-prod-db-dump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
import logging
import os
import time
from calendar import timegm
from datetime import datetime
from http.client import HTTPSConnection
from socket import gaierror
try:
from urllib2 import urlopen, HTTPError, URLError
except ImportError: # pragma: no cover
from urllib.error import HTTPError, URLError
from urllib.request import urlopen
HOST = "https://balrog-public-dump-prod.s3.amazonaws.com"
PATH = "/dump.sql.txt.xz"
LOCAL_DB_PATH = os.getenv("LOCAL_DUMP", "/app/scripts/prod_db_dump.sql.xz")
TIMEOUT = 10
logging.basicConfig(level=logging.DEBUG, format="%(asctime)s: %(message)s")
def getRemoteDBModifiedTS():
"""
Performs a HEAD request to get the Last-Modified date-time
of a database dump file and parses it into a UNIX timestamp.
"""
debug_msg = "Unable to get timestamp of remote database dump - {0}"
logging.info("Getting timestamp of database dump at '%s'", HOST + PATH)
try:
# Removing the scheme from the URL
conn = HTTPSConnection(HOST[8:], timeout=TIMEOUT)
conn.request("HEAD", PATH)
except gaierror as e:
logging.debug(debug_msg.format("Cannot connect to '%s', error: %s"), HOST + PATH, e)
exit(1)
rsp = conn.getresponse()
if rsp.status != 200:
logging.debug(debug_msg.format("Server responded with: %d %s"), rsp.status, rsp.reason)
exit(1)
last_modified = rsp.getheader("last-modified", None)
if last_modified is None:
logging.debug(debug_msg.format("Response doesnt include Last-Modified Header"))
exit(1)
last_m_dt = datetime.strptime(last_modified.split(", ")[1], "%d %b %Y %H:%M:%S %Z")
return timegm(last_m_dt.timetuple())
def getLocalDBModifiedTS():
"""
Gets the UNIX timestamp of the local database dump file.
Returns 0 on error.
"""
try:
return int(os.path.getmtime(LOCAL_DB_PATH))
except OSError:
return 0
def setLocalDBTimestamp(prod_db_ts):
"""
Sets mtime on the local database dump file to the remote database dump
file's mtime. Sets atime to now.
"""
now_ts = int(time.time())
os.utime(LOCAL_DB_PATH, (now_ts, prod_db_ts))
def setLocalDBPermissions():
os.chmod(LOCAL_DB_PATH, 0o666)
if __name__ == "__main__":
prod_db_ts = getRemoteDBModifiedTS()
local_db_ts = getLocalDBModifiedTS()
if prod_db_ts > 0:
if not os.path.exists(LOCAL_DB_PATH) or (prod_db_ts > local_db_ts):
logging.info("Downloading latest database dump to '%s'", LOCAL_DB_PATH)
try:
rsp = urlopen(HOST + PATH, timeout=TIMEOUT)
except (HTTPError, URLError) as e:
logging.debug("Downloading the latest database dump failed" "due to network error: %s", e)
exit(1)
with open(LOCAL_DB_PATH, "wb") as f:
f.write(rsp.read())
setLocalDBTimestamp(prod_db_ts)
setLocalDBPermissions()
else:
logging.info("Cached dump is up-to-date")