/
compute.py
executable file
·118 lines (101 loc) · 3.89 KB
/
compute.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import requests
import json
from requests.exceptions import RequestException, Timeout
import requests_mock
requests_mock.Mocker.TEST_PREFIX = "test"
TIMEOUT_IN_SECONDS = 3.5
MAX_ATTEMPTS_SUBMIT_JOB = 4
class JobFailError(Exception):
"""An Exception to raise when a remote jobs has failed"""
class WorkersUnreachableError(Exception):
"""
An Exception to raise when the backend workers are not reachable. This
should only be raised when the webapp is run without the workers.
"""
class Compute(object):
def remote_submit_job(
self, url: str, data: dict, timeout: int = TIMEOUT_IN_SECONDS, headers=None
):
response = requests.post(url, json=data, timeout=timeout, headers=headers)
return response
def submit_job(self, project, task_name, task_kwargs, path_prefix="", tag=None):
print(
"submitting", task_name,
)
cluster = project.cluster
tag = tag or str(project.latest_tag)
url = f"{cluster.url}{path_prefix}/{project.owner}/{project.title}/"
print(url)
return self.submit(
tasks=dict(task_name=task_name, tag=tag, task_kwargs=task_kwargs),
url=url,
headers=cluster.headers(),
)
def submit(self, tasks, url, headers):
submitted = False
attempts = 0
while not submitted:
try:
print(tasks)
response = self.remote_submit_job(
url, data=tasks, timeout=TIMEOUT_IN_SECONDS, headers=headers
)
if response.status_code in (200, 201):
print("submitted: ", url)
submitted = True
data = response.json()
job_id = data.get("task_id") or data.get("id")
else:
print("FAILED: ", url, response.status_code, response.json())
attempts += 1
except Timeout:
print("Couldn't submit to: ", url)
attempts += 1
except RequestException as re:
print("Something unexpected happened: ", re)
attempts += 1
if attempts > MAX_ATTEMPTS_SUBMIT_JOB:
print("Exceeded max attempts. Bailing out.")
raise WorkersUnreachableError()
return job_id
class SyncCompute(Compute):
def submit(self, tasks, url, headers):
submitted = False
attempts = 0
while not submitted:
try:
response = self.remote_submit_job(
url, data=tasks, timeout=TIMEOUT_IN_SECONDS, headers=headers
)
if response.status_code == 200:
print("submitted: ", url)
submitted = True
if not response.text:
return
data = response.json()
else:
print("FAILED: ", url, response.status_code, response.text)
attempts += 1
except Timeout:
print("Couldn't submit to: ", url)
attempts += 1
except RequestException as re:
print("Something unexpected happened: ", re)
attempts += 1
if attempts > MAX_ATTEMPTS_SUBMIT_JOB:
print("Exceeded max attempts. Bailing out.")
raise WorkersUnreachableError()
if isinstance(data, list):
success = True
else:
success = data["status"] == "SUCCESS"
return success, data
class SyncProjects(SyncCompute):
def submit_job(self, project, cluster):
if cluster.version == "v0":
url = f"{cluster.url}/sync/"
else:
url = f"{cluster.url}/api/v1/projects/sync/"
headers = cluster.headers()
return self.submit(tasks=[project], url=url, headers=headers)