Skip to content

Commit

Permalink
implement #74
Browse files Browse the repository at this point in the history
  • Loading branch information
jbilcke-hf committed Aug 27, 2024
1 parent 9bbe0fa commit b67cf31
Show file tree
Hide file tree
Showing 3 changed files with 146 additions and 0 deletions.
73 changes: 73 additions & 0 deletions packages/app/src/app/api/resolve/providers/replicate/runLipSync.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import Replicate from 'replicate'
import { ClapSegmentCategory } from '@aitube/clap'
import { TimelineSegment } from '@aitube/timeline'
import { ResolveRequest } from '@aitube/clapper-services'

export async function runLipSync(
request: ResolveRequest
): Promise<TimelineSegment> {
if (!request.settings.replicateApiKey) {
throw new Error(`Missing API key for "Replicate.com"`)
}

const replicate = new Replicate({ auth: request.settings.replicateApiKey })

const segment: TimelineSegment = request.segment

const firstDialogue = request.segments.find(
(s) => s.category === ClapSegmentCategory.DIALOGUE
)
const firstDialogueAudio = firstDialogue?.assetUrl

if (segment.category === ClapSegmentCategory.VIDEO) {
const videoLipsyncWorkflowModel =
request.settings.videoLipsyncWorkflow.data || ''

if (!videoLipsyncWorkflowModel) {
throw new Error(
`cannot run the lip sync without an videoLipsyncWorkflowModel`
)
}
if (!segment.assetUrl) {
throw new Error(`cannot run the lip sync without a video`)
}

if (!firstDialogueAudio) {
throw new Error(`cannot run the lip sync without a dialogue speech`)
}

try {
// console.log(`requested model:`, request.settings.videoLipsyncWorkflow.data)
const response = (await replicate.run(
request.settings.videoLipsyncWorkflow.data as any,
{
input: {
// note: this is actually a VIDEO (they call it face, but it's a face video)
face: segment.assetUrl,
input_audio: firstDialogueAudio,

disable_safety_checker:
!request.settings.censorNotForAllAudiencesContent,
},
}
)) as any

// note how it is
const imageResult = `${response || ''}`

if (!imageResult) {
throw new Error(`the generated image is empty`)
}

segment.assetUrl = imageResult
} catch (err) {
console.error(`failed to run a lip sync using Replicate.com:`, err)
}
} else {
throw new Error(
`Clapper doesn't support lip sync for the "${segment.category}" category using Replicate.com yet`
)
}

return segment
}
56 changes: 56 additions & 0 deletions packages/app/src/app/api/resolve/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import { getMediaInfo } from '@/lib/ffmpeg/getMediaInfo'
import { getSegmentWorkflowProviderAndEngine } from '@/services/editors/workflow-editor/getSegmentWorkflowProviderAndEngine'
import { runFaceSwap as runFaceswapWithFalAi } from './providers/falai/runFaceSwap'
import { runFaceSwap as runFaceswapWithReplicate } from './providers/replicate/runFaceSwap'
import { runLipSync as runLipSyncWithReplicate } from './providers/replicate/runLipSync'

type ProviderFn = (request: ResolveRequest) => Promise<TimelineSegment>

Expand All @@ -54,6 +55,9 @@ export async function POST(req: NextRequest) {
faceswapWorkflow,
faceswapProvider,
faceswapEngine,
lipsyncWorkflow,
lipsyncProvider,
lipsyncEngine,
} = getSegmentWorkflowProviderAndEngine(request)

/*
Expand Down Expand Up @@ -202,5 +206,57 @@ export async function POST(req: NextRequest) {
}
}

// extra step: lip sync
// for this we need to have a valid video
// (or we could use a simple image + audio model)

const hasValidVideo =
segment.category === ClapSegmentCategory.VIDEO && segment.assetUrl

const firstDialogue = request.segments.find(
(s) => s.category === ClapSegmentCategory.DIALOGUE
)
const hasValidAudio = firstDialogue?.assetUrl

if (
lipsyncProvider &&
request.settings.videoLipsyncWorkflow.data &&
hasValidVideo &&
hasValidAudio
) {
const lipsyncProviders: Partial<Record<ClapWorkflowProvider, ProviderFn>> =
{
// TODO use Fal.ai? I think they only have SadTalker?
[ClapWorkflowProvider.REPLICATE]: runLipSyncWithReplicate,
}

const lipsync: ProviderFn | undefined =
lipsyncProviders[lipsyncProvider] || undefined

if (lipsync) {
try {
await lipsync(request)

// we clean-up and parse the output from all the resolvers:
// this will download files hosted on CDNs, convert WAV files to MP3 etc

segment.assetUrl = await decodeOutput(segment.assetUrl)

segment.assetSourceType = getClapAssetSourceType(segment.assetUrl)

segment.status = ClapSegmentStatus.COMPLETED

const { assetFileFormat, outputType } = getTypeAndExtension(
segment.assetUrl
)

segment.assetFileFormat = assetFileFormat
segment.outputType = outputType
} catch (err) {
console.error(`failed to run the lipsync (${err})`)
}
}
}

return NextResponse.json(segment)
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ export function getSegmentWorkflowProviderAndEngine({
faceswapWorkflow?: ClapWorkflow
faceswapProvider?: ClapWorkflowProvider
faceswapEngine?: ClapWorkflowEngine
lipsyncWorkflow?: ClapWorkflow
lipsyncProvider?: ClapWorkflowProvider
lipsyncEngine?: ClapWorkflowEngine
} {
const generationWorkflow: ClapWorkflow | undefined =
segment.category === ClapSegmentCategory.STORYBOARD
Expand Down Expand Up @@ -53,12 +56,26 @@ export function getSegmentWorkflowProviderAndEngine({
const faceswapEngine: ClapWorkflowEngine | undefined =
faceswapWorkflow?.engine || undefined

const lipsyncWorkflow: ClapWorkflow | undefined =
segment.category === ClapSegmentCategory.VIDEO
? settings.videoLipsyncWorkflow
: undefined

const lipsyncProvider: ClapWorkflowProvider | undefined =
lipsyncWorkflow?.provider || undefined

const lipsyncEngine: ClapWorkflowEngine | undefined =
lipsyncWorkflow?.engine || undefined

return {
generationWorkflow,
generationProvider,
generationEngine,
faceswapWorkflow,
faceswapProvider,
faceswapEngine,
lipsyncWorkflow,
lipsyncProvider,
lipsyncEngine,
}
}

0 comments on commit b67cf31

Please sign in to comment.