Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [
{
"id": 0,
"logprob": null,
"text": "<pad>"
}
],
"seed": null,
"tokens": [
{
"id": 3,
"logprob": -0.7001953,
"special": false,
"text": " "
},
{
"id": 18,
"logprob": -1.1943359,
"special": false,
"text": "-"
},
{
"id": 26937,
"logprob": -1.2099609,
"special": false,
"text": "196"
},
{
"id": 3,
"logprob": -1.2451172,
"special": false,
"text": " "
},
{
"id": 1956,
"logprob": -0.3322754,
"special": false,
"text": "°"
},
{
"id": 254,
"logprob": -0.19213867,
"special": false,
"text": "C"
},
{
"id": 1,
"logprob": -0.030151367,
"special": true,
"text": "</s>"
}
]
},
"generated_text": "-196 °C"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
[
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [
{
"id": 0,
"logprob": null,
"text": "<pad>"
}
],
"seed": null,
"tokens": [
{
"id": 3,
"logprob": -0.7001953,
"special": false,
"text": " "
},
{
"id": 18,
"logprob": -1.1943359,
"special": false,
"text": "-"
},
{
"id": 26937,
"logprob": -1.2119141,
"special": false,
"text": "196"
},
{
"id": 3,
"logprob": -1.2480469,
"special": false,
"text": " "
},
{
"id": 1956,
"logprob": -0.33203125,
"special": false,
"text": "°"
},
{
"id": 254,
"logprob": -0.19250488,
"special": false,
"text": "C"
},
{
"id": 1,
"logprob": -0.030166626,
"special": true,
"text": "</s>"
}
]
},
"generated_text": "-196 °C"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [
{
"id": 0,
"logprob": null,
"text": "<pad>"
}
],
"seed": null,
"tokens": [
{
"id": 3,
"logprob": -0.7001953,
"special": false,
"text": " "
},
{
"id": 18,
"logprob": -1.1943359,
"special": false,
"text": "-"
},
{
"id": 26937,
"logprob": -1.2119141,
"special": false,
"text": "196"
},
{
"id": 3,
"logprob": -1.2480469,
"special": false,
"text": " "
},
{
"id": 1956,
"logprob": -0.33203125,
"special": false,
"text": "°"
},
{
"id": 254,
"logprob": -0.19250488,
"special": false,
"text": "C"
},
{
"id": 1,
"logprob": -0.030166626,
"special": true,
"text": "</s>"
}
]
},
"generated_text": "-196 °C"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [
{
"id": 0,
"logprob": null,
"text": "<pad>"
}
],
"seed": null,
"tokens": [
{
"id": 3,
"logprob": -0.7001953,
"special": false,
"text": " "
},
{
"id": 18,
"logprob": -1.1943359,
"special": false,
"text": "-"
},
{
"id": 26937,
"logprob": -1.2119141,
"special": false,
"text": "196"
},
{
"id": 3,
"logprob": -1.2480469,
"special": false,
"text": " "
},
{
"id": 1956,
"logprob": -0.33203125,
"special": false,
"text": "°"
},
{
"id": 254,
"logprob": -0.19250488,
"special": false,
"text": "C"
},
{
"id": 1,
"logprob": -0.030166626,
"special": true,
"text": "</s>"
}
]
},
"generated_text": "-196 °C"
},
{
"details": {
"best_of_sequences": null,
"finish_reason": "eos_token",
"generated_tokens": 7,
"prefill": [
{
"id": 0,
"logprob": null,
"text": "<pad>"
}
],
"seed": null,
"tokens": [
{
"id": 3,
"logprob": -0.7001953,
"special": false,
"text": " "
},
{
"id": 18,
"logprob": -1.1943359,
"special": false,
"text": "-"
},
{
"id": 26937,
"logprob": -1.2099609,
"special": false,
"text": "196"
},
{
"id": 3,
"logprob": -1.2451172,
"special": false,
"text": " "
},
{
"id": 1956,
"logprob": -0.3322754,
"special": false,
"text": "°"
},
{
"id": 254,
"logprob": -0.19213867,
"special": false,
"text": "C"
},
{
"id": 1,
"logprob": -0.030151367,
"special": true,
"text": "</s>"
}
]
},
"generated_text": "-196 °C"
}
]
4 changes: 3 additions & 1 deletion integration-tests/models/test_flash_neox.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ async def test_flash_neox_load(flash_neox, generate_load, response_snapshot):
generated_texts = [r.generated_text for r in responses]

assert len(generated_texts) == 4
assert generated_texts, all([text == generated_texts[0] for text in generated_texts])
assert generated_texts, all(
[text == generated_texts[0] for text in generated_texts]
)

assert responses == response_snapshot
38 changes: 38 additions & 0 deletions integration-tests/models/test_t5_sharded.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest


@pytest.fixture(scope="module")
def t5_sharded_handle(launcher):
with launcher("google/flan-t5-xxl", num_shard=2) as handle:
yield handle


@pytest.fixture(scope="module")
async def t5_sharded(t5_sharded_handle):
await t5_sharded_handle.health(240)
return t5_sharded_handle.client


@pytest.mark.asyncio
async def test_t5_sharded(t5_sharded, response_snapshot):
response = await t5_sharded.generate(
"Please answer the following question. What is the boiling point of Nitrogen?",
max_new_tokens=10,
)

assert response == response_snapshot


@pytest.mark.asyncio
async def test_t5_sharded_load(t5_sharded, generate_load, response_snapshot):
responses = await generate_load(
t5_sharded,
"Please answer the following question. What is the boiling point of Nitrogen?",
max_new_tokens=10,
n=4,
)

assert len(responses) == 4
assert all([r.generated_text == responses[0].generated_text for r in responses])

assert responses == response_snapshot
5 changes: 4 additions & 1 deletion server/text_generation_server/models/bloom.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,10 @@ def load_weights(
# XXX: Hack for Rowlinear to add the bias only once.
if rank != 0:
tensor = torch.zeros_like(tensor)
elif isinstance(module, TensorParallelEmbedding) or name == "lm_head.weight":
elif (
isinstance(module, TensorParallelEmbedding)
or name == "lm_head.weight"
):
size = slice_.get_shape()[0]
block_size = size // world_size
start = rank * block_size
Expand Down
Loading