diff --git a/Makefile b/Makefile index 68f98a0b..c29b44cf 100644 --- a/Makefile +++ b/Makefile @@ -76,6 +76,9 @@ test-jobs: docker-build-test test-twitter: docker-build-test @docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/twitter_test.go ./internal/jobs/jobs_suite_test.go +test-tiktok: docker-build-test + @docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/tiktok_transcription_test.go ./internal/jobs/jobs_suite_test.go + test-web: docker-build-test @docker run --user root $(ENV_FILE_ARG) -v $(PWD)/.masa:/home/masa -v $(PWD)/coverage:/app/coverage --rm --workdir /app -e DATA_DIR=/home/masa $(TEST_IMAGE) go test -v ./internal/jobs/webscraper_test.go ./internal/jobs/jobs_suite_test.go diff --git a/go.mod b/go.mod index 52861b0f..15fbf216 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/joho/godotenv v1.5.1 github.com/labstack/echo-contrib v0.17.4 github.com/labstack/echo/v4 v4.13.4 - github.com/masa-finance/tee-types v1.1.6 + github.com/masa-finance/tee-types v1.1.7 github.com/onsi/ginkgo/v2 v2.23.4 github.com/onsi/gomega v1.38.0 github.com/sirupsen/logrus v1.9.3 diff --git a/go.sum b/go.sum index b381ea6e..c6e9ffa7 100644 --- a/go.sum +++ b/go.sum @@ -57,8 +57,8 @@ github.com/labstack/echo/v4 v4.13.4 h1:oTZZW+T3s9gAu5L8vmzihV7/lkXGZuITzTQkTEhcX github.com/labstack/echo/v4 v4.13.4/go.mod h1:g63b33BZ5vZzcIUF8AtRH40DrTlXnx4UMC8rBdndmjQ= github.com/labstack/gommon v0.4.2 h1:F8qTUNXgG1+6WQmqoUWnz8WiEU60mXVVw0P4ht1WRA0= github.com/labstack/gommon v0.4.2/go.mod h1:QlUFxVM+SNXhDL/Z7YhocGIBYOiwB0mXm1+1bAPHPyU= -github.com/masa-finance/tee-types v1.1.6 h1:vw5gOK2ZoCnsmrjdY9NCUR9GY9c0VxvzwQy5V4sNemo= -github.com/masa-finance/tee-types v1.1.6/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc= +github.com/masa-finance/tee-types v1.1.7 h1:VF55egisbUCAsfyhvGM26rzt+oKfJgHqcK/LW4uQ9M4= +github.com/masa-finance/tee-types v1.1.7/go.mod h1:sB98t0axFlPi2d0zUPFZSQ84mPGwbr9eRY5yLLE3fSc= github.com/masa-finance/twitter-scraper v1.0.2 h1:him+wvYZHg/7EDdy73z1ceUywDJDRAhPLD2CSEa2Vfk= github.com/masa-finance/twitter-scraper v1.0.2/go.mod h1:38MY3g/h4V7Xl4HbW9lnkL8S3YiFZenBFv86hN57RG8= github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= diff --git a/internal/jobs/tiktok_transcription.go b/internal/jobs/tiktok_transcription.go index d7a704d1..ba893a13 100644 --- a/internal/jobs/tiktok_transcription.go +++ b/internal/jobs/tiktok_transcription.go @@ -203,34 +203,18 @@ func (ttt *TikTokTranscriber) ExecuteJob(j types.Job) (types.JobResult, error) { } vttText := "" - finalDetectedLanguage := "" - // Try requested/default language - if selectedLanguageKey != "" { - if transcript, ok := parsedAPIResponse.Transcripts[selectedLanguageKey]; ok { - vttText = transcript - finalDetectedLanguage = selectedLanguageKey - } - } - - // If not found, try a hardcoded common default or first available - if vttText == "" { - commonDefault := "eng-US" // As per spec - if transcript, ok := parsedAPIResponse.Transcripts[commonDefault]; ok { - vttText = transcript - finalDetectedLanguage = commonDefault - } else { // Pick the first one available if commonDefault also not found - for lang, transcript := range parsedAPIResponse.Transcripts { - vttText = transcript - finalDetectedLanguage = lang - logrus.WithFields(logrus.Fields{ - "job_uuid": j.UUID, - "requested_lang": selectedLanguageKey, - "fallback_used": finalDetectedLanguage, - }).Info("Requested/default language not found, using first available transcript") - break - } - } + // Directly use the requested/default language; if missing, return an error + if transcript, ok := parsedAPIResponse.Transcripts[selectedLanguageKey]; ok && strings.TrimSpace(transcript) != "" { + vttText = transcript + } else { + errMsg := fmt.Sprintf("Transcript for requested language %s not found in API response", selectedLanguageKey) + logrus.WithFields(logrus.Fields{ + "job_uuid": j.UUID, + "requested_lang": selectedLanguageKey, + }).Error(errMsg) + ttt.stats.Add(j.WorkerID, stats.TikTokTranscriptionErrors, 1) + return types.JobResult{Error: errMsg}, fmt.Errorf(errMsg) } if vttText == "" { @@ -240,7 +224,7 @@ func (ttt *TikTokTranscriber) ExecuteJob(j types.Job) (types.JobResult, error) { return types.JobResult{Error: errMsg}, fmt.Errorf(errMsg) } - logrus.Debugf("Job %s: Raw VTT content for language %s:\n%s", j.UUID, finalDetectedLanguage, vttText) + logrus.Debugf("Job %s: Raw VTT content for language %s:\n%s", j.UUID, selectedLanguageKey, vttText) // Convert VTT to Plain Text plainTextTranscription, err := convertVTTToPlainText(vttText) @@ -255,7 +239,7 @@ func (ttt *TikTokTranscriber) ExecuteJob(j types.Job) (types.JobResult, error) { // Process Result & Return resultData := teetypes.TikTokTranscriptionResult{ TranscriptionText: plainTextTranscription, - DetectedLanguage: finalDetectedLanguage, + DetectedLanguage: selectedLanguageKey, VideoTitle: parsedAPIResponse.VideoTitle, OriginalURL: tiktokArgs.GetVideoURL(), ThumbnailURL: parsedAPIResponse.ThumbnailURL, diff --git a/internal/jobs/tiktok_transcription_test.go b/internal/jobs/tiktok_transcription_test.go index 0b3b9aad..b9f10191 100644 --- a/internal/jobs/tiktok_transcription_test.go +++ b/internal/jobs/tiktok_transcription_test.go @@ -41,10 +41,10 @@ var _ = Describe("TikTokTranscriber", func() { Context("when a valid TikTok URL is provided", func() { It("should successfully transcribe the video and record success stats", func(ctx SpecContext) { - videoURL := "https://www.tiktok.com/@coachty23/video/7502100651397172526" + videoURL := "https://www.tiktok.com/@.jake.ai/video/7516694182245813509" jobArguments := map[string]interface{}{ "video_url": videoURL, - "language": "eng-US", // Request a specific language + // default language is eng-US from tee types } job := types.Job{