Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ jobs:
restore-keys: |
${{ runner.os }}-gradle-

- name: Compile
run: ./gradlew compileJava
- name: Compile SDK and examples
run: ./gradlew compileJava compileExamples

test:
name: Unit Tests (Java ${{ matrix.java-version }})
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tests-daily.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ jobs:
restore-keys: |
${{ runner.os }}-gradle-

- name: Compile
run: ./gradlew compileJava
- name: Compile SDK and examples
run: ./gradlew compileJava compileExamples

unit-tests:
name: Unit Tests
Expand Down
124 changes: 102 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ The SDK supports API Key authentication with automatic environment variable load
import com.deepgram.DeepgramClient;

// Using environment variable (DEEPGRAM_API_KEY)
DeepgramClient client = DeepgramClient.builder().build();
DeepgramClient envClient = DeepgramClient.builder().build();

// Using API key directly
DeepgramClient client = DeepgramClient.builder()
DeepgramClient explicitClient = DeepgramClient.builder()
.apiKey("YOUR_DEEPGRAM_API_KEY")
.build();
```
Expand Down Expand Up @@ -220,13 +220,20 @@ Stream audio for real-time speech-to-text.

```java
import com.deepgram.DeepgramClient;
import com.deepgram.resources.listen.v1.types.ListenV1CloseStream;
import com.deepgram.resources.listen.v1.types.ListenV1CloseStreamType;
import com.deepgram.resources.listen.v1.websocket.V1WebSocketClient;
import com.deepgram.resources.listen.v1.websocket.V1ConnectOptions;
import com.deepgram.types.ListenV1Model;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.TimeUnit;
import okio.ByteString;

DeepgramClient client = DeepgramClient.builder().build();
byte[] audioBytes = Files.readAllBytes(Path.of("audio.wav"));

V1WebSocketClient ws = client.listen().v1().websocket();
V1WebSocketClient ws = client.listen().v1().v1WebSocket();

// Register event handlers
ws.onResults(results -> {
Expand All @@ -247,9 +254,14 @@ ws.onError(error -> {
// Connect with options (model is required)
ws.connect(V1ConnectOptions.builder()
.model(ListenV1Model.NOVA3)
.build());
.build())
.get(10, TimeUnit.SECONDS);

ws.send(audioBytes);
ws.sendMedia(ByteString.of(audioBytes));
ws.sendCloseStream(ListenV1CloseStream.builder()
.type(ListenV1CloseStreamType.CLOSE_STREAM)
.build())
.get(5, TimeUnit.SECONDS);

// Close when done
ws.close();
Expand All @@ -261,15 +273,25 @@ Stream text for real-time audio generation.

```java
import com.deepgram.DeepgramClient;
import com.deepgram.resources.speak.v1.types.SpeakV1Close;
import com.deepgram.resources.speak.v1.types.SpeakV1CloseType;
import com.deepgram.resources.speak.v1.types.SpeakV1Flush;
import com.deepgram.resources.speak.v1.types.SpeakV1FlushType;
import com.deepgram.resources.speak.v1.types.SpeakV1Text;
import com.deepgram.resources.speak.v1.websocket.V1WebSocketClient;
import java.io.ByteArrayOutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.TimeUnit;

DeepgramClient client = DeepgramClient.builder().build();
ByteArrayOutputStream audioBuffer = new ByteArrayOutputStream();

var ttsWs = client.speak().v1().websocket();
V1WebSocketClient ttsWs = client.speak().v1().v1WebSocket();

// Register event handlers
ttsWs.onAudioData(audioData -> {
// Process audio chunks as they arrive
ttsWs.onSpeakV1Audio(audioData -> {
audioBuffer.writeBytes(audioData.toByteArray());
});

ttsWs.onMetadata(metadata -> {
Expand All @@ -281,8 +303,23 @@ ttsWs.onError(error -> {
});

// Connect and send text
ttsWs.connect();
ttsWs.send("Hello, this is streamed text-to-speech.");
ttsWs.connect().get(10, TimeUnit.SECONDS);
ttsWs.sendText(SpeakV1Text.builder()
.text("Hello, this is streamed text-to-speech.")
.build())
.get(5, TimeUnit.SECONDS);
ttsWs.sendFlush(SpeakV1Flush.builder()
.type(SpeakV1FlushType.FLUSH)
.build())
.get(5, TimeUnit.SECONDS);

Thread.sleep(2000);
Files.write(Path.of("output.wav"), audioBuffer.toByteArray());

ttsWs.sendClose(SpeakV1Close.builder()
.type(SpeakV1CloseType.CLOSE)
.build())
.get(5, TimeUnit.SECONDS);

// Close when done
ttsWs.close();
Expand All @@ -294,24 +331,58 @@ Connect to Deepgram's voice agent for real-time conversational AI.

```java
import com.deepgram.DeepgramClient;
import com.deepgram.resources.agent.v1.types.AgentV1InjectUserMessage;
import com.deepgram.resources.agent.v1.types.AgentV1Settings;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgent;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThink;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItem;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItemProvider;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAudio;
import com.deepgram.resources.agent.v1.websocket.V1WebSocketClient;
import com.deepgram.types.OpenAiThinkProvider;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;

DeepgramClient client = DeepgramClient.builder().build();

var agentWs = client.agent().v1().websocket();
V1WebSocketClient agentWs = client.agent().v1().v1WebSocket();

// Register event handlers
agentWs.onWelcome(welcome -> {
System.out.println("Agent connected");

agentWs.sendSettings(AgentV1Settings.builder()
.audio(AgentV1SettingsAudio.builder().build())
.agent(AgentV1SettingsAgent.builder()
.think(AgentV1SettingsAgentThink.of(List.of(
AgentV1SettingsAgentThinkOneItem.builder()
.provider(AgentV1SettingsAgentThinkOneItemProvider.of(
OpenAiThinkProvider.of(Map.of("model", "gpt-4o-mini"))))
.prompt("You are a helpful voice assistant. Keep responses brief.")
.build())))
.greeting("Hello! How can I help you today?")
.build())
.build());
});

agentWs.onSettingsApplied(applied -> {
agentWs.sendInjectUserMessage(AgentV1InjectUserMessage.builder()
.content("What is the capital of France?")
.build());
});

agentWs.onConversationText(text -> {
System.out.printf("[%s] %s%n", text.getRole(), text.getContent());
});

agentWs.onError(error -> {
System.err.println("Error: " + error.getMessage());
});

// Connect and interact
agentWs.connect();
agentWs.send(audioBytes);
// Connect and wait for the agent to respond
agentWs.connect().get(10, TimeUnit.SECONDS);
Thread.sleep(5000);

// Close when done
agentWs.close();
Expand All @@ -330,13 +401,22 @@ Use the separate [`deepgram-sagemaker`](https://github.com/deepgram/deepgram-jav
```groovy
dependencies {
implementation 'com.deepgram:deepgram-java-sdk:0.2.1' // x-release-please-version
implementation 'com.deepgram:deepgram-sagemaker:0.1.0'
implementation 'com.deepgram:deepgram-sagemaker:0.1.2'
}
```

```java
import com.deepgram.DeepgramClient;
import com.deepgram.sagemaker.SageMakerConfig;
import com.deepgram.sagemaker.SageMakerTransportFactory;
import com.deepgram.resources.listen.v1.websocket.V1ConnectOptions;
import com.deepgram.types.ListenV1Model;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.TimeUnit;
import okio.ByteString;

byte[] audioBytes = Files.readAllBytes(Path.of("audio.wav"));

var factory = new SageMakerTransportFactory(
SageMakerConfig.builder()
Expand All @@ -351,10 +431,11 @@ DeepgramClient client = DeepgramClient.builder()
.build();

// Use the SDK exactly as normal — the transport is transparent
var ws = client.listen().v1().websocket();
var ws = client.listen().v1().v1WebSocket();
ws.onResults(results -> { /* ... */ });
ws.connect(V1ConnectOptions.builder().model(ListenV1Model.NOVA3).build());
ws.sendMedia(audioBytes);
ws.connect(V1ConnectOptions.builder().model(ListenV1Model.NOVA3).build())
.get(10, TimeUnit.SECONDS);
ws.sendMedia(ByteString.of(audioBytes));
```

See the [SageMaker example](examples/sagemaker/LiveStreamingSageMaker.java) for a complete walkthrough.
Expand Down Expand Up @@ -511,7 +592,6 @@ import com.deepgram.resources.listen.v1.media.types.MediaTranscribeResponse;
DeepgramApiHttpResponse<MediaTranscribeResponse> rawResponse =
client.listen().v1().media().withRawResponse().transcribeUrl(request);

int statusCode = rawResponse.statusCode();
var headers = rawResponse.headers();
MediaTranscribeResponse body = rawResponse.body();
```
Expand All @@ -524,13 +604,13 @@ The SDK provides comprehensive access to Deepgram's APIs:
```java
client.listen().v1().media().transcribeUrl(request) // Transcribe audio from URL
client.listen().v1().media().transcribeFile(body) // Transcribe audio from file bytes
client.listen().v1().websocket() // Real-time streaming transcription
client.listen().v1().v1WebSocket() // Real-time streaming transcription
```

### Speak (Text-to-Speech)
```java
client.speak().v1().audio().generate(request) // Generate speech from text
client.speak().v1().websocket() // Real-time streaming TTS
client.speak().v1().v1WebSocket() // Real-time streaming TTS
```

### Read (Text Intelligence)
Expand All @@ -541,7 +621,7 @@ client.read().v1().text().analyze(request) // Analyze text content
### Agent (Voice Agent)
```java
client.agent().v1().settings().think().models().list() // List available agent models
client.agent().v1().websocket() // Real-time agent WebSocket
client.agent().v1().v1WebSocket() // Real-time agent WebSocket
```

### Manage (Project Management)
Expand Down
11 changes: 8 additions & 3 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ dependencies {
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
testImplementation 'com.squareup.okhttp3:mockwebserver:4.12.0'
testImplementation 'org.assertj:assertj-core:3.25.3'
testImplementation 'com.deepgram:deepgram-sagemaker:0.1.2'

}

test {
Expand Down Expand Up @@ -78,8 +80,7 @@ sourceSets {
examples {
java {
srcDir 'examples'
// Exclude examples with known issues (see ISSUES section below)
exclude 'agent/**' // Blocked: missing AgentV1UpdateThink type
// Exclude examples that still need API updates.
exclude 'manage/ListModels.java' // Duplicate class name with agent/ListModels
exclude 'manage/MemberPermissions.java' // getScopes() not in generated API
exclude 'manage/UsageBreakdown.java' // getModels() return type mismatch
Expand All @@ -89,6 +90,11 @@ sourceSets {
}
}

dependencies {
// Optional dependencies needed by example source set
examplesImplementation 'com.deepgram:deepgram-sagemaker:0.1.2'
}

// Compile all examples
tasks.register('compileExamples') {
dependsOn 'examplesClasses'
Expand Down Expand Up @@ -123,4 +129,3 @@ spotless {
removeUnusedImports()
}
}

52 changes: 24 additions & 28 deletions examples/agent/CustomProviders.java
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
import com.deepgram.DeepgramClient;
import com.deepgram.resources.agent.v1.types.AgentV1Settings;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgent;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentContext;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentContextSpeak;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentContextThink;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeak;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeakEndpoint;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeakEndpointProvider;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentSpeakOneItemProviderDeepgramModel;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThink;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItem;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAgentThinkOneItemProvider;
import com.deepgram.resources.agent.v1.types.AgentV1SettingsAudio;
import com.deepgram.resources.agent.v1.types.Deepgram;
import com.deepgram.resources.agent.v1.websocket.V1WebSocketClient;
import com.deepgram.types.Anthropic;
import com.deepgram.types.AnthropicThinkProviderModel;
import com.deepgram.types.Deepgram;
import com.deepgram.types.DeepgramSpeakProviderModel;
import com.deepgram.types.SpeakSettingsV1;
import com.deepgram.types.SpeakSettingsV1Provider;
import com.deepgram.types.ThinkSettingsV1;
import com.deepgram.types.ThinkSettingsV1Provider;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -57,34 +57,30 @@ public static void main(String[] args) {

try {
// Configure Anthropic as the think provider
Anthropic anthropicProvider = Anthropic.builder()
.model(AnthropicThinkProviderModel.CLAUDE_SONNET420250514)
.build();

ThinkSettingsV1 thinkSettings = ThinkSettingsV1.builder()
.provider(ThinkSettingsV1Provider.anthropic(anthropicProvider))
.prompt("You are a helpful assistant. Keep responses concise.")
.build();
Anthropic anthropicProvider = Anthropic.of(Map.of("model", "claude-sonnet-4-20250514"));

// Configure Deepgram as the speak provider
Deepgram deepgramSpeakProvider = Deepgram.builder()
.model(DeepgramSpeakProviderModel.AURA2ASTERIA_EN)
.build();

SpeakSettingsV1 speakSettings = SpeakSettingsV1.builder()
.provider(SpeakSettingsV1Provider.deepgram(deepgramSpeakProvider))
.model(AgentV1SettingsAgentSpeakOneItemProviderDeepgramModel.AURA2ASTERIA_EN)
.build();

// Build agent settings with both providers
AgentV1SettingsAgentContext agentContext = AgentV1SettingsAgentContext.builder()
.think(AgentV1SettingsAgentContextThink.of(thinkSettings))
.speak(AgentV1SettingsAgentContextSpeak.of(speakSettings))
AgentV1SettingsAgentSpeak speakSettings =
AgentV1SettingsAgentSpeak.of(AgentV1SettingsAgentSpeakEndpoint.builder()
.provider(AgentV1SettingsAgentSpeakEndpointProvider.deepgram(deepgramSpeakProvider))
.build());

AgentV1SettingsAgent agentConfig = AgentV1SettingsAgent.builder()
.think(AgentV1SettingsAgentThink.of(List.of(AgentV1SettingsAgentThinkOneItem.builder()
.provider(AgentV1SettingsAgentThinkOneItemProvider.of(anthropicProvider))
.prompt("You are a helpful assistant. Keep responses concise.")
.build())))
.speak(speakSettings)
.greeting("Hello! I'm powered by Anthropic Claude with Deepgram voices.")
.build();

AgentV1Settings settings = AgentV1Settings.builder()
.audio(AgentV1SettingsAudio.builder().build())
.agent(AgentV1SettingsAgent.of(agentContext))
.agent(agentConfig)
.build();

wsClient.sendSettings(settings);
Expand Down
Loading
Loading