Skip to content

Commit

Permalink
feat: Add LLama2 support to LLM events and mock server (newrelic#238)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsumners-nr committed Jan 9, 2024
1 parent 42d04ff commit 31dad9c
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 6 deletions.
20 changes: 18 additions & 2 deletions lib/llm/bedrock-command.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class BedrockCommand {
result = this.#body.max_tokens_to_sample
} else if (this.isCohere() === true) {
result = this.#body.max_tokens
} else if (this.isLlama2() === true) {
result = this.#body.max_gen_length
} else if (this.isTitan() === true) {
result = this.#body.textGenerationConfig?.maxTokenCount
}
Expand Down Expand Up @@ -74,7 +76,12 @@ class BedrockCommand {
result = this.#body.inputText
} else if (this.isCohereEmbed() === true) {
result = this.#body.texts.join(' ')
} else if (this.isClaude() === true || this.isAi21() === true || this.isCohere() === true) {
} else if (
this.isClaude() === true ||
this.isAi21() === true ||
this.isCohere() === true ||
this.isLlama2() === true
) {
result = this.#body.prompt
}
return result
Expand All @@ -87,7 +94,12 @@ class BedrockCommand {
let result
if (this.isTitan() === true) {
result = this.#body.textGenerationConfig?.temperature
} else if (this.isClaude() === true || this.isAi21() === true || this.isCohere() === true) {
} else if (
this.isClaude() === true ||
this.isAi21() === true ||
this.isCohere() === true ||
this.isLlama2() === true
) {
result = this.#body.temperature
}
return result
Expand All @@ -109,6 +121,10 @@ class BedrockCommand {
return this.#modelId.startsWith('cohere.embed')
}

isLlama2() {
return this.#modelId.startsWith('meta.llama2')
}

isTitan() {
return this.#modelId.startsWith('amazon.titan')
}
Expand Down
4 changes: 4 additions & 0 deletions lib/llm/bedrock-response.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class BedrockResponse {
} else if (cmd.isCohere() === true) {
this.#completions = body.generations?.map((g) => g.text) ?? []
this.#id = body.id
} else if (cmd.isLlama2() === true) {
body.generation && this.#completions.push(body.generation)
} else if (cmd.isTitan() === true) {
this.#completions = body.results?.map((r) => r.outputText) ?? []
}
Expand Down Expand Up @@ -83,6 +85,8 @@ class BedrockResponse {
result = this.#parsedBody.stop_reason
} else if (cmd.isCohere() === true) {
result = this.#parsedBody.generations?.[0].finish_reason
} else if (cmd.isLlama2() === true) {
result = this.#parsedBody.stop_reason
} else if (cmd.isTitan() === true) {
result = this.#parsedBody.results?.[0]?.completionReason
}
Expand Down
2 changes: 1 addition & 1 deletion lib/llm/chat-completion-summary.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class LlmChatCompletionSummary extends LlmEvent {

if (cmd.isAi21() === true) {
this[nm] = 1 + this.bedrockResponse.completions.length
} else if (cmd.isClaude() === true) {
} else if (cmd.isClaude() === true || cmd.isLlama2() === true) {
this[nm] = 2
} else if (cmd.isCohere() === true) {
this[nm] = 1 + this.bedrockResponse.completions.length
Expand Down
42 changes: 41 additions & 1 deletion tests/unit/llm/bedrock-command.tap.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ const cohereEmbed = {
}
}

const llama2 = {
modelId: 'meta.llama2-13b-chat-v1',
body: {
prompt: 'who are you'
}
}

const titan = {
modelId: 'amazon.titan-text-lite-v1',
body: {
Expand Down Expand Up @@ -65,7 +72,15 @@ tap.beforeEach((t) => {

tap.test('non-conforming command is handled gracefully', async (t) => {
const cmd = new BedrockCommand(t.context.input)
for (const model of ['Ai21', 'Claude', 'Cohere', 'CohereEmbed', 'Titan', 'TitanEmbed']) {
for (const model of [
'Ai21',
'Claude',
'Cohere',
'CohereEmbed',
'Llama2',
'Titan',
'TitanEmbed'
]) {
t.equal(cmd[`is${model}`](), false)
}
t.equal(cmd.maxTokens, undefined)
Expand Down Expand Up @@ -161,6 +176,31 @@ tap.test('cohere embed minimal command works', async (t) => {
t.equal(cmd.temperature, undefined)
})

tap.test('llama2 minimal command works', async (t) => {
t.context.updatePayload(structuredClone(llama2))
const cmd = new BedrockCommand(t.context.input)
t.equal(cmd.isLlama2(), true)
t.equal(cmd.maxTokens, undefined)
t.equal(cmd.modelId, llama2.modelId)
t.equal(cmd.modelType, 'completion')
t.equal(cmd.prompt, llama2.body.prompt)
t.equal(cmd.temperature, undefined)
})

tap.test('llama2 complete command works', async (t) => {
const payload = structuredClone(llama2)
payload.body.max_gen_length = 25
payload.body.temperature = 0.5
t.context.updatePayload(payload)
const cmd = new BedrockCommand(t.context.input)
t.equal(cmd.isLlama2(), true)
t.equal(cmd.maxTokens, 25)
t.equal(cmd.modelId, payload.modelId)
t.equal(cmd.modelType, 'completion')
t.equal(cmd.prompt, payload.body.prompt)
t.equal(cmd.temperature, payload.body.temperature)
})

tap.test('titan minimal command works', async (t) => {
t.context.updatePayload(structuredClone(titan))
const cmd = new BedrockCommand(t.context.input)
Expand Down
35 changes: 35 additions & 0 deletions tests/unit/llm/bedrock-response.tap.js
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@ const cohere = {
]
}

const llama2 = {
generation: 'llama2-response',
stop_reason: 'done'
}

const titan = {
results: [
{
Expand Down Expand Up @@ -73,6 +78,9 @@ tap.beforeEach((t) => {
isCohere() {
return false
},
isLlama2() {
return false
},
isTitan() {
return false
}
Expand Down Expand Up @@ -177,6 +185,33 @@ tap.test('cohere complete responses work', async (t) => {
t.equal(res.statusCode, 200)
})

tap.test('llama2 malformed responses work', async (t) => {
t.context.bedrockCommand.isLlama2 = () => true
const res = new BedrockResponse(t.context)
t.same(res.completions, [])
t.equal(res.finishReason, undefined)
t.same(res.headers, t.context.response.response.headers)
t.equal(res.id, undefined)
t.equal(res.inputTokenCount, 25)
t.equal(res.outputTokenCount, 25)
t.equal(res.requestId, 'aws-request-1')
t.equal(res.statusCode, 200)
})

tap.test('llama2 complete responses work', async (t) => {
t.context.bedrockCommand.isLlama2 = () => true
t.context.updatePayload(structuredClone(llama2))
const res = new BedrockResponse(t.context)
t.same(res.completions, ['llama2-response'])
t.equal(res.finishReason, 'done')
t.same(res.headers, t.context.response.response.headers)
t.equal(res.id, undefined)
t.equal(res.inputTokenCount, 25)
t.equal(res.outputTokenCount, 25)
t.equal(res.requestId, 'aws-request-1')
t.equal(res.statusCode, 200)
})

tap.test('titan malformed responses work', async (t) => {
t.context.bedrockCommand.isTitan = () => true
const res = new BedrockResponse(t.context)
Expand Down
17 changes: 17 additions & 0 deletions tests/unit/llm/chat-completion-summary.tap.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ tap.beforeEach((t) => {
isCohere() {
return false
},
isLlama2() {
return false
},
isTitan() {
return false
}
Expand Down Expand Up @@ -127,6 +130,20 @@ tap.test('creates a cohere summary', async (t) => {
t.equal(event['response.number_of_messages'], 2)
})

tap.test('creates a llama2 summary', async (t) => {
t.context.bedrockCommand.isLlama2 = () => true
const event = new LlmChatCompletionSummary(t.context)
t.equal(event.conversation_id, 'conversation-1')
t.equal(event.duration, 100)
t.equal(event['request.max_tokens'], 25)
t.equal(event['request.temperature'], 0.5)
t.equal(event['response.choices.finish_reason'], 'done')
t.equal(event['response.usage.total_tokens'], 50)
t.equal(event['response.usage.prompt_tokens'], 25)
t.equal(event['response.usage.completion_tokens'], 25)
t.equal(event['response.number_of_messages'], 2)
})

tap.test('creates a titan summary', async (t) => {
t.context.bedrockCommand.isTitan = () => true
const event = new LlmChatCompletionSummary(t.context)
Expand Down
6 changes: 6 additions & 0 deletions tests/versioned/aws-server-stubs/ai-server/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,12 @@ function handler(req, res) {
break
}

case 'meta.llama2-13b-chat-v1':
case 'meta.llama2-70b-chat-v1': {
response = responses.llama2.get(payload.prompt)
break
}

default: {
response = { statusCode: 418, body: {} }
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,12 @@ const ai21 = require('./ai21')
const amazon = require('./amazon')
const claude = require('./claude')
const cohere = require('./cohere')
const llama2 = require('./llama2')

module.exports = {
ai21,
amazon,
claude,
cohere
cohere,
llama2
}
70 changes: 70 additions & 0 deletions tests/versioned/aws-server-stubs/ai-server/responses/llama2.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright 2024 New Relic Corporation. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/

'use strict'

const responses = new Map()

responses.set('text llama2 ultimate question', {
headers: {
'content-type': 'application/json',
'x-amzn-requestid': 'eda0760a-c3f0-4fc1-9a1e-75559d642866',
'x-amzn-bedrock-invocation-latency': 9677,
'x-amzn-bedrock-output-token-count': 4,
'x-amzn-bedrock-input-token-count': 8
},
statusCode: 200,
body: {
generation: '42',
prompt_token_count: 14,
generation_token_count: 205,
stop_reason: 'endoftext'
}
})

responses.set('text llama2 ultimate question streamed', {
headers: {
'content-type': 'application/vnd.amazon.eventstream',
'x-amzn-requestid': 'eda0760a-c3f0-4fc1-9a1e-75559d642866',
'x-amzn-bedrock-content-type': 'application/json'
},
statusCode: 200,
chunks: [
{
headers: {
':event-type': { type: 'string', value: 'chunk' },
':content-type': { type: 'string', value: 'application/json' },
':message-type': { type: 'string', value: 'event' }
},
body: {
generation: '42',
prompt_token_count: null,
generation_token_count: 211,
stop_reason: null
}
},
{
headers: {
':event-type': { type: 'string', value: 'chunk' },
':content-type': { type: 'string', value: 'application/json' },
':message-type': { type: 'string', value: 'event' }
},
body: {
'generation': '',
'prompt_token_count': null,
'generation_token_count': 212,
'stop_reason': 'endoftext',
'amazon-bedrock-invocationMetrics': {
inputTokenCount: 14,
outputTokenCount: 212,
invocationLatency: 9825,
firstByteLatency: 283
}
}
}
]
})

module.exports = responses
7 changes: 6 additions & 1 deletion tests/versioned/v3/bedrock-chat-completions.tap.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ const requests = {
cohere: (prompt, modelId) => ({
body: JSON.stringify({ prompt }),
modelId
}),
llama2: (prompt, modelId) => ({
body: JSON.stringify({ prompt }),
modelId
})
}

Expand Down Expand Up @@ -76,7 +80,8 @@ tap.afterEach(async (t) => {
{ modelId: 'ai21.j2-ultra-v1', resKey: 'ai21' },
{ modelId: 'amazon.titan-text-express-v1', resKey: 'amazon' },
{ modelId: 'anthropic.claude-v2', resKey: 'claude' },
{ modelId: 'cohere.command-text-v14', resKey: 'cohere' }
{ modelId: 'cohere.command-text-v14', resKey: 'cohere' },
{ modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama2' }
].forEach(({ modelId, resKey }) => {
tap.test(`${modelId}: should properly create completion segment`, (t) => {
const { bedrock, client, responses, helper, expectedExternalPath } = t.context
Expand Down

0 comments on commit 31dad9c

Please sign in to comment.