Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ test-jobs: docker-build-test
test-twitter: docker-build-test
@docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/twitter_test.go ./internal/jobs/jobs_suite_test.go

test-tiktok: docker-build-test
@docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/tiktok_transcription_test.go ./internal/jobs/jobs_suite_test.go

test-web: docker-build-test
@docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/webscraper_test.go ./internal/jobs/jobs_suite_test.go

Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ require (
github.com/joho/godotenv v1.5.1
github.com/labstack/echo-contrib v0.17.4
github.com/labstack/echo/v4 v4.13.4
github.com/masa-finance/tee-types v1.1.6
github.com/masa-finance/tee-types v1.1.7
github.com/onsi/ginkgo/v2 v2.23.4
github.com/onsi/gomega v1.38.0
github.com/sirupsen/logrus v1.9.3
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ github.com/labstack/echo/v4 v4.13.4 h1:oTZZW+T3s9gAu5L8vmzihV7/lkXGZuITzTQkTEhcX
github.com/labstack/echo/v4 v4.13.4/go.mod h1:g63b33BZ5vZzcIUF8AtRH40DrTlXnx4UMC8rBdndmjQ=
github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0=
github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU=
github.com/masa-finance/tee-types v1.1.6 h1:vw5gOK2ZoCnsmrjdY9NCUR9GY9c0VxvzwQy5V4sNemo=
github.com/masa-finance/tee-types v1.1.6/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc=
github.com/masa-finance/tee-types v1.1.7 h1:VF55egisbUCAsfyhvGM26rzt+oKfJgHqcK/LW4uQ9M4=
github.com/masa-finance/tee-types v1.1.7/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc=
github.com/masa-finance/twitter-scraper v1.0.2 h1:him+wvYZHg/7EDdy73z1ceUywDJDRAhPLD2CSEa2Vfk=
github.com/masa-finance/twitter-scraper v1.0.2/go.mod h1:38MY3g/h4V7Xl4HbW9lnkL8S3YiFZenBFv86hN57RG8=
github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE=
Expand Down
42 changes: 13 additions & 29 deletions internal/jobs/tiktok_transcription.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,34 +203,18 @@ func (ttt *TikTokTranscriber) ExecuteJob(j types.Job) (types.JobResult, error) {
}

vttText := ""
finalDetectedLanguage := ""

// Try requested/default language
if selectedLanguageKey != "" {
if transcript, ok := parsedAPIResponse.Transcripts[selectedLanguageKey]; ok {
vttText = transcript
finalDetectedLanguage = selectedLanguageKey
}
}

// If not found, try a hardcoded common default or first available
if vttText == "" {
commonDefault := "eng-US" // As per spec
if transcript, ok := parsedAPIResponse.Transcripts[commonDefault]; ok {
vttText = transcript
finalDetectedLanguage = commonDefault
} else { // Pick the first one available if commonDefault also not found
for lang, transcript := range parsedAPIResponse.Transcripts {
vttText = transcript
finalDetectedLanguage = lang
logrus.WithFields(logrus.Fields{
"job_uuid": j.UUID,
"requested_lang": selectedLanguageKey,
"fallback_used": finalDetectedLanguage,
}).Info("Requested/default language not found, using first available transcript")
break
}
}
// Directly use the requested/default language; if missing, return an error
if transcript, ok := parsedAPIResponse.Transcripts[selectedLanguageKey]; ok && strings.TrimSpace(transcript) != "" {
vttText = transcript
} else {
errMsg := fmt.Sprintf("Transcript for requested language %s not found in API response", selectedLanguageKey)
logrus.WithFields(logrus.Fields{
"job_uuid": j.UUID,
"requested_lang": selectedLanguageKey,
}).Error(errMsg)
ttt.stats.Add(j.WorkerID, stats.TikTokTranscriptionErrors, 1)
return types.JobResult{Error: errMsg}, fmt.Errorf(errMsg)
}

if vttText == "" {
Expand All @@ -240,7 +224,7 @@ func (ttt *TikTokTranscriber) ExecuteJob(j types.Job) (types.JobResult, error) {
return types.JobResult{Error: errMsg}, fmt.Errorf(errMsg)
}

logrus.Debugf("Job %s: Raw VTT content for language %s:\n%s", j.UUID, finalDetectedLanguage, vttText)
logrus.Debugf("Job %s: Raw VTT content for language %s:\n%s", j.UUID, selectedLanguageKey, vttText)

// Convert VTT to Plain Text
plainTextTranscription, err := convertVTTToPlainText(vttText)
Expand All @@ -255,7 +239,7 @@ func (ttt *TikTokTranscriber) ExecuteJob(j types.Job) (types.JobResult, error) {
// Process Result & Return
resultData := teetypes.TikTokTranscriptionResult{
TranscriptionText: plainTextTranscription,
DetectedLanguage: finalDetectedLanguage,
DetectedLanguage: selectedLanguageKey,
VideoTitle: parsedAPIResponse.VideoTitle,
OriginalURL: tiktokArgs.GetVideoURL(),
ThumbnailURL: parsedAPIResponse.ThumbnailURL,
Expand Down
4 changes: 2 additions & 2 deletions internal/jobs/tiktok_transcription_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ var _ = Describe("TikTokTranscriber", func() {

Context("when a valid TikTok URL is provided", func() {
It("should successfully transcribe the video and record success stats", func(ctx SpecContext) {
videoURL := "https://www.tiktok.com/@coachty23/video/7502100651397172526"
videoURL := "https://www.tiktok.com/@.jake.ai/video/7516694182245813509"
jobArguments := map[string]interface{}{
"video_url": videoURL,
"language": "eng-US", // Request a specific language
// default language is eng-US from tee types
}

job := types.Job{
Expand Down
Loading