feat: Add LLama2 support to LLM events and mock server (newrelic#238)

bizob2828 · Jan 9, 2024 · 31dad9c · 31dad9c
1 parent 42d04ff
commit 31dad9c
Show file tree

Hide file tree

Showing 10 changed files with 201 additions and 6 deletions.
diff --git a/lib/llm/bedrock-command.js b/lib/llm/bedrock-command.js
@@ -37,6 +37,8 @@ class BedrockCommand {
       result = this.#body.max_tokens_to_sample
     } else if (this.isCohere() === true) {
       result = this.#body.max_tokens
+    } else if (this.isLlama2() === true) {
+      result = this.#body.max_gen_length
     } else if (this.isTitan() === true) {
       result = this.#body.textGenerationConfig?.maxTokenCount
     }
@@ -74,7 +76,12 @@ class BedrockCommand {
       result = this.#body.inputText
     } else if (this.isCohereEmbed() === true) {
       result = this.#body.texts.join(' ')
-    } else if (this.isClaude() === true || this.isAi21() === true || this.isCohere() === true) {
+    } else if (
+      this.isClaude() === true ||
+      this.isAi21() === true ||
+      this.isCohere() === true ||
+      this.isLlama2() === true
+    ) {
       result = this.#body.prompt
     }
     return result
@@ -87,7 +94,12 @@ class BedrockCommand {
     let result
     if (this.isTitan() === true) {
       result = this.#body.textGenerationConfig?.temperature
-    } else if (this.isClaude() === true || this.isAi21() === true || this.isCohere() === true) {
+    } else if (
+      this.isClaude() === true ||
+      this.isAi21() === true ||
+      this.isCohere() === true ||
+      this.isLlama2() === true
+    ) {
       result = this.#body.temperature
     }
     return result
@@ -109,6 +121,10 @@ class BedrockCommand {
     return this.#modelId.startsWith('cohere.embed')
   }
 
+  isLlama2() {
+    return this.#modelId.startsWith('meta.llama2')
+  }
+
   isTitan() {
     return this.#modelId.startsWith('amazon.titan')
   }

diff --git a/lib/llm/bedrock-response.js b/lib/llm/bedrock-response.js
@@ -54,6 +54,8 @@ class BedrockResponse {
     } else if (cmd.isCohere() === true) {
       this.#completions = body.generations?.map((g) => g.text) ?? []
       this.#id = body.id
+    } else if (cmd.isLlama2() === true) {
+      body.generation && this.#completions.push(body.generation)
     } else if (cmd.isTitan() === true) {
       this.#completions = body.results?.map((r) => r.outputText) ?? []
     }
@@ -83,6 +85,8 @@ class BedrockResponse {
       result = this.#parsedBody.stop_reason
     } else if (cmd.isCohere() === true) {
       result = this.#parsedBody.generations?.[0].finish_reason
+    } else if (cmd.isLlama2() === true) {
+      result = this.#parsedBody.stop_reason
     } else if (cmd.isTitan() === true) {
       result = this.#parsedBody.results?.[0]?.completionReason
     }

diff --git a/lib/llm/chat-completion-summary.js b/lib/llm/chat-completion-summary.js
@@ -44,7 +44,7 @@ class LlmChatCompletionSummary extends LlmEvent {
 
     if (cmd.isAi21() === true) {
       this[nm] = 1 + this.bedrockResponse.completions.length
-    } else if (cmd.isClaude() === true) {
+    } else if (cmd.isClaude() === true || cmd.isLlama2() === true) {
       this[nm] = 2
     } else if (cmd.isCohere() === true) {
       this[nm] = 1 + this.bedrockResponse.completions.length

diff --git a/tests/unit/llm/bedrock-command.tap.js b/tests/unit/llm/bedrock-command.tap.js
@@ -38,6 +38,13 @@ const cohereEmbed = {
   }
 }
 
+const llama2 = {
+  modelId: 'meta.llama2-13b-chat-v1',
+  body: {
+    prompt: 'who are you'
+  }
+}
+
 const titan = {
   modelId: 'amazon.titan-text-lite-v1',
   body: {
@@ -65,7 +72,15 @@ tap.beforeEach((t) => {
 
 tap.test('non-conforming command is handled gracefully', async (t) => {
   const cmd = new BedrockCommand(t.context.input)
-  for (const model of ['Ai21', 'Claude', 'Cohere', 'CohereEmbed', 'Titan', 'TitanEmbed']) {
+  for (const model of [
+    'Ai21',
+    'Claude',
+    'Cohere',
+    'CohereEmbed',
+    'Llama2',
+    'Titan',
+    'TitanEmbed'
+  ]) {
     t.equal(cmd[`is${model}`](), false)
   }
   t.equal(cmd.maxTokens, undefined)
@@ -161,6 +176,31 @@ tap.test('cohere embed minimal command works', async (t) => {
   t.equal(cmd.temperature, undefined)
 })
 
+tap.test('llama2 minimal command works', async (t) => {
+  t.context.updatePayload(structuredClone(llama2))
+  const cmd = new BedrockCommand(t.context.input)
+  t.equal(cmd.isLlama2(), true)
+  t.equal(cmd.maxTokens, undefined)
+  t.equal(cmd.modelId, llama2.modelId)
+  t.equal(cmd.modelType, 'completion')
+  t.equal(cmd.prompt, llama2.body.prompt)
+  t.equal(cmd.temperature, undefined)
+})
+
+tap.test('llama2 complete command works', async (t) => {
+  const payload = structuredClone(llama2)
+  payload.body.max_gen_length = 25
+  payload.body.temperature = 0.5
+  t.context.updatePayload(payload)
+  const cmd = new BedrockCommand(t.context.input)
+  t.equal(cmd.isLlama2(), true)
+  t.equal(cmd.maxTokens, 25)
+  t.equal(cmd.modelId, payload.modelId)
+  t.equal(cmd.modelType, 'completion')
+  t.equal(cmd.prompt, payload.body.prompt)
+  t.equal(cmd.temperature, payload.body.temperature)
+})
+
 tap.test('titan minimal command works', async (t) => {
   t.context.updatePayload(structuredClone(titan))
   const cmd = new BedrockCommand(t.context.input)

diff --git a/tests/unit/llm/bedrock-response.tap.js b/tests/unit/llm/bedrock-response.tap.js
@@ -38,6 +38,11 @@ const cohere = {
   ]
 }
 
+const llama2 = {
+  generation: 'llama2-response',
+  stop_reason: 'done'
+}
+
 const titan = {
   results: [
     {
@@ -73,6 +78,9 @@ tap.beforeEach((t) => {
     isCohere() {
       return false
     },
+    isLlama2() {
+      return false
+    },
     isTitan() {
       return false
     }
@@ -177,6 +185,33 @@ tap.test('cohere complete responses work', async (t) => {
   t.equal(res.statusCode, 200)
 })
 
+tap.test('llama2 malformed responses work', async (t) => {
+  t.context.bedrockCommand.isLlama2 = () => true
+  const res = new BedrockResponse(t.context)
+  t.same(res.completions, [])
+  t.equal(res.finishReason, undefined)
+  t.same(res.headers, t.context.response.response.headers)
+  t.equal(res.id, undefined)
+  t.equal(res.inputTokenCount, 25)
+  t.equal(res.outputTokenCount, 25)
+  t.equal(res.requestId, 'aws-request-1')
+  t.equal(res.statusCode, 200)
+})
+
+tap.test('llama2 complete responses work', async (t) => {
+  t.context.bedrockCommand.isLlama2 = () => true
+  t.context.updatePayload(structuredClone(llama2))
+  const res = new BedrockResponse(t.context)
+  t.same(res.completions, ['llama2-response'])
+  t.equal(res.finishReason, 'done')
+  t.same(res.headers, t.context.response.response.headers)
+  t.equal(res.id, undefined)
+  t.equal(res.inputTokenCount, 25)
+  t.equal(res.outputTokenCount, 25)
+  t.equal(res.requestId, 'aws-request-1')
+  t.equal(res.statusCode, 200)
+})
+
 tap.test('titan malformed responses work', async (t) => {
   t.context.bedrockCommand.isTitan = () => true
   const res = new BedrockResponse(t.context)

diff --git a/tests/unit/llm/chat-completion-summary.tap.js b/tests/unit/llm/chat-completion-summary.tap.js
@@ -54,6 +54,9 @@ tap.beforeEach((t) => {
     isCohere() {
       return false
     },
+    isLlama2() {
+      return false
+    },
     isTitan() {
       return false
     }
@@ -127,6 +130,20 @@ tap.test('creates a cohere summary', async (t) => {
   t.equal(event['response.number_of_messages'], 2)
 })
 
+tap.test('creates a llama2 summary', async (t) => {
+  t.context.bedrockCommand.isLlama2 = () => true
+  const event = new LlmChatCompletionSummary(t.context)
+  t.equal(event.conversation_id, 'conversation-1')
+  t.equal(event.duration, 100)
+  t.equal(event['request.max_tokens'], 25)
+  t.equal(event['request.temperature'], 0.5)
+  t.equal(event['response.choices.finish_reason'], 'done')
+  t.equal(event['response.usage.total_tokens'], 50)
+  t.equal(event['response.usage.prompt_tokens'], 25)
+  t.equal(event['response.usage.completion_tokens'], 25)
+  t.equal(event['response.number_of_messages'], 2)
+})
+
 tap.test('creates a titan summary', async (t) => {
   t.context.bedrockCommand.isTitan = () => true
   const event = new LlmChatCompletionSummary(t.context)

diff --git a/tests/versioned/aws-server-stubs/ai-server/index.js b/tests/versioned/aws-server-stubs/ai-server/index.js
@@ -98,6 +98,12 @@ function handler(req, res) {
         break
       }
 
+      case 'meta.llama2-13b-chat-v1':
+      case 'meta.llama2-70b-chat-v1': {
+        response = responses.llama2.get(payload.prompt)
+        break
+      }
+
       default: {
         response = { statusCode: 418, body: {} }
       }

diff --git a/tests/versioned/aws-server-stubs/ai-server/responses/index.js b/tests/versioned/aws-server-stubs/ai-server/responses/index.js
@@ -9,10 +9,12 @@ const ai21 = require('./ai21')
 const amazon = require('./amazon')
 const claude = require('./claude')
 const cohere = require('./cohere')
+const llama2 = require('./llama2')
 
 module.exports = {
   ai21,
   amazon,
   claude,
-  cohere
+  cohere,
+  llama2
 }
diff --git a/tests/versioned/aws-server-stubs/ai-server/responses/llama2.js b/tests/versioned/aws-server-stubs/ai-server/responses/llama2.js
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2024 New Relic Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+'use strict'
+
+const responses = new Map()
+
+responses.set('text llama2 ultimate question', {
+  headers: {
+    'content-type': 'application/json',
+    'x-amzn-requestid': 'eda0760a-c3f0-4fc1-9a1e-75559d642866',
+    'x-amzn-bedrock-invocation-latency': 9677,
+    'x-amzn-bedrock-output-token-count': 4,
+    'x-amzn-bedrock-input-token-count': 8
+  },
+  statusCode: 200,
+  body: {
+    generation: '42',
+    prompt_token_count: 14,
+    generation_token_count: 205,
+    stop_reason: 'endoftext'
+  }
+})
+
+responses.set('text llama2 ultimate question streamed', {
+  headers: {
+    'content-type': 'application/vnd.amazon.eventstream',
+    'x-amzn-requestid': 'eda0760a-c3f0-4fc1-9a1e-75559d642866',
+    'x-amzn-bedrock-content-type': 'application/json'
+  },
+  statusCode: 200,
+  chunks: [
+    {
+      headers: {
+        ':event-type': { type: 'string', value: 'chunk' },
+        ':content-type': { type: 'string', value: 'application/json' },
+        ':message-type': { type: 'string', value: 'event' }
+      },
+      body: {
+        generation: '42',
+        prompt_token_count: null,
+        generation_token_count: 211,
+        stop_reason: null
+      }
+    },
+    {
+      headers: {
+        ':event-type': { type: 'string', value: 'chunk' },
+        ':content-type': { type: 'string', value: 'application/json' },
+        ':message-type': { type: 'string', value: 'event' }
+      },
+      body: {
+        'generation': '',
+        'prompt_token_count': null,
+        'generation_token_count': 212,
+        'stop_reason': 'endoftext',
+        'amazon-bedrock-invocationMetrics': {
+          inputTokenCount: 14,
+          outputTokenCount: 212,
+          invocationLatency: 9825,
+          firstByteLatency: 283
+        }
+      }
+    }
+  ]
+})
+
+module.exports = responses
diff --git a/tests/versioned/v3/bedrock-chat-completions.tap.js b/tests/versioned/v3/bedrock-chat-completions.tap.js
@@ -27,6 +27,10 @@ const requests = {
   cohere: (prompt, modelId) => ({
     body: JSON.stringify({ prompt }),
     modelId
+  }),
+  llama2: (prompt, modelId) => ({
+    body: JSON.stringify({ prompt }),
+    modelId
   })
 }
 
@@ -76,7 +80,8 @@ tap.afterEach(async (t) => {
   { modelId: 'ai21.j2-ultra-v1', resKey: 'ai21' },
   { modelId: 'amazon.titan-text-express-v1', resKey: 'amazon' },
   { modelId: 'anthropic.claude-v2', resKey: 'claude' },
-  { modelId: 'cohere.command-text-v14', resKey: 'cohere' }
+  { modelId: 'cohere.command-text-v14', resKey: 'cohere' },
+  { modelId: 'meta.llama2-13b-chat-v1', resKey: 'llama2' }
 ].forEach(({ modelId, resKey }) => {
   tap.test(`${modelId}: should properly create completion segment`, (t) => {
     const { bedrock, client, responses, helper, expectedExternalPath } = t.context