/
test_setup.py
163 lines (134 loc) · 5.99 KB
/
test_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"Tests setup script (currently for Python backend)"
import os
import subprocess
import signal
import shutil
from pathlib import Path
from typing import Dict, Union
import pexpect
import pytest
import requests
curdir = Path(__file__).parent
root = curdir.parent.parent
test_models_dir = curdir/"models"
def setup_module():
"Setup steps for tests in this module"
assert (root/"setup.sh").exists(), "setup.sh not found"
if (root/".env").exists():
shutil.move(str(root/".env"), str(root/".env.bak"))
def teardown_module():
"Teardown steps for tests in this module"
if (root/".env.bak").exists():
shutil.move(str(root/".env.bak"), str(root/".env"))
try:
if test_models_dir.exists():
shutil.rmtree(test_models_dir)
except Exception as exc:
print(
f"WARNING: Couldn't delete `{test_models_dir}` most likely due to permission issues."
f"Run the tests with sudo to ensure this gets deleted automatically, or else delete manually. Exception: {exc}"
)
def enter_input(proc: pexpect.spawn, expect: str, input_s: str, timeout: int = 5) -> str:
"Helper function to enter input for a given prompt. Returns consumed output."
try:
proc.expect(expect, timeout=timeout)
except pexpect.exceptions.TIMEOUT as exc:
raise AssertionError(
f"Timeout waiting for prompt: `{expect}`.\n"
f"Output-before: `{proc.before}`\nOutput-after: `{proc.after}`"
) from exc
after = str(proc.after)
print(after)
proc.sendline(input_s)
return after
def run_common_setup_steps(n_gpus: int = 0) -> pexpect.spawn:
"Helper function to run common setup steps."
proc = pexpect.pty_spawn.spawn(
"./setup.sh 2>&1", encoding="utf-8", cwd=str(root),
)
proc.ignorecase = True
enter_input(proc, r".*Enter number of GPUs[^:]+: ?", str(n_gpus))
enter_input(proc, r".*port for the API[^:]+: ?", "5000")
enter_input(proc, r".*Address for Triton[^:]+: ?", "triton")
enter_input(proc, r".*Port of Triton[^:]+: ?", "8001")
enter_input(proc, r".*save your models[^\?]+\? ?", str(test_models_dir.absolute()))
return proc
def load_test_env():
"Load test env vars"
# Without loading default env vars, PATH won't be set correctly
env = os.environ.copy()
with open(curdir/"test.env", "r", encoding="utf8") as test_env:
for line in test_env:
key, val = line.strip().split("=")
env[key] = val
return env
def run_inference(
prompt: str, model_name: str = "py-model", port: int = 5000, return_all: bool = False,
**kwargs
) -> Union[str, Dict]:
"Invokes the copilot proxy with the given prompt and returns the completion"
endpoint = f"http://localhost:{port}/v1/engines/codegen/completions"
data = {
"model": model_name,
"prompt": prompt,
"suffix": kwargs.get("suffix", ""),
"max_tokens": kwargs.get("max_tokens", 16),
"temperature": kwargs.get("temperature", 0.0),
"top_p": kwargs.get("top_p", 1.0),
"n": kwargs.get("n", 1),
"stream": kwargs.get("stream", None), # it's not true/false. It's None or not None :[
"logprobs": kwargs.get("logprobs", 0),
"stop": kwargs.get("stop", ""),
"echo": kwargs.get("echo", True),
"presence_penalty": kwargs.get("presence_penalty", 0.0),
"frequency_penalty": kwargs.get("frequency_penalty", 0.0),
"best_of": kwargs.get("best_of", 1),
"logit_bias": kwargs.get("logit_bias", {}),
"user": kwargs.get("user", "test"),
}
response = requests.post(endpoint, json=data)
response.raise_for_status()
if return_all:
return response.json()
return response.json()["choices"][0]["text"]
@pytest.mark.parametrize("n_gpus", [0]) # we don't have a GPU on CI
def test_python_backend(n_gpus: int):
"""
Step 1: run $root/setup.sh while passing appropriate options via stdin
Step 2: run docker-compose up with test.env sourced
Step 3: call :5000 with appropriate request
"""
proc = run_common_setup_steps(n_gpus)
choices = enter_input(proc, r".*Choose your backend.*Enter your choice[^:]+: ?", "2")
assert "[2] Python backend" in choices, "Option 2 should be Python backend"
choices = enter_input(proc, r".*Models available:.*Enter your choice[^:]+: ?", "1")
assert "[1] codegen-350M-mono" in choices, "Option 1 should be codegen-350M-mono"
enter_input(proc, r".*share (your )?huggingface cache[^:]+: ?", "y")
enter_input(proc, r".*cache directory[^:]+: ?", "") # default
enter_input(proc, r".*use int8[^:]+: ?", "n")
enter_input(proc, r".*run FauxPilot\? \[y/n\] ", "n", timeout=120)
# copy $root/.env to $curdir/test.env
shutil.copy(str(root/".env"), str(curdir/"test.env"))
# run docker-compose up -f docker-compose-{without|with}-gpus.yml
compose_file = f"docker-compose-with{'' if n_gpus > 0 else 'out'}-gpus.yaml"
docker_proc = None
try:
docker_proc = pexpect.pty_spawn.spawn(
f"docker compose -f {compose_file} up",
encoding="utf-8",
cwd=curdir,
env=load_test_env(),
)
print("Waiting for API to be ready...")
docker_proc.expect(r".*Started GRPCInferenceService at 0.0.0.0:8001", timeout=120)
print("API ready, sending request...")
# Simple test 1: hello world prompt without bells and whistles
response = run_inference("def hello_world():\n", max_tokens=16, return_all=True)
assert response["choices"][0]["text"].rstrip() == ' print("Hello World")\n\nhello_world()\n\n#'
assert response["choices"][0]["finish_reason"] == "length"
finally:
if docker_proc is not None and docker_proc.isalive():
docker_proc.kill(signal.SIGINT)
# killing docker-compose process doesn't bring down the containers.
# explicitly stop the containers:
subprocess.run(["docker-compose", "-f", compose_file, "down"], cwd=curdir, check=True, env=load_test_env())