diff --git a/fia_api/web/api/teacher/schema.py b/fia_api/web/api/teacher/schema.py index 559a70f..d7ba033 100644 --- a/fia_api/web/api/teacher/schema.py +++ b/fia_api/web/api/teacher/schema.py @@ -1,5 +1,6 @@ from typing import Any, List, Optional, Union +from fastapi import UploadFile from pydantic import BaseModel, Field @@ -142,3 +143,11 @@ class GetAudioRequest(BaseModel): text: str language_code: str # TODO: Add features like language and speaker type? + + +class ConverseWithAudioRequest(BaseModel): + """Request object for an audio file.""" + + conversation_id: str + language_code: str + audio_file: UploadFile diff --git a/fia_api/web/api/teacher/utils.py b/fia_api/web/api/teacher/utils.py index c284614..0d7680c 100644 --- a/fia_api/web/api/teacher/utils.py +++ b/fia_api/web/api/teacher/utils.py @@ -352,11 +352,12 @@ async def initialize_conversation( return await get_response(str(conversation_id), message, user) -async def get_text_from_audio(audio_file: UploadFile) -> str: +async def get_text_from_audio(audio_file: UploadFile, language_code: str) -> str: """ Given a file, return the text. :param audio_file: UploadFile object to transcode to text. + :param language_code: String language code the audio is in. :return: String text. """ # TODO: Shouldn't have to do this dance with writing/reading the file! @@ -366,7 +367,11 @@ async def get_text_from_audio(audio_file: UploadFile) -> str: with open("/tmp/whatever.wav", "rb") as in_fh: # noqa: S108 # TODO: Store the token usage too - return openai.Audio.transcribe("whisper-1", in_fh, language="de")["text"] + return openai.Audio.transcribe( + "whisper-1", + in_fh, + language=language_code, + )["text"] # TODO: Make this bytes or whatever. diff --git a/fia_api/web/api/teacher/views.py b/fia_api/web/api/teacher/views.py index eee0dc8..2329342 100644 --- a/fia_api/web/api/teacher/views.py +++ b/fia_api/web/api/teacher/views.py @@ -1,9 +1,10 @@ -from fastapi import APIRouter, Depends, UploadFile +from fastapi import APIRouter, Depends from fastapi.responses import StreamingResponse from fia_api.db.models.user_model import UserModel from fia_api.web.api.teacher.schema import ( ConverseResponse, + ConverseWithAudioRequest, GetAudioRequest, TeacherConverseRequest, ) @@ -46,32 +47,30 @@ async def converse( @router.post("/converse-with-audio", response_model=ConverseResponse) async def converse_with_audio( - conversation_id: str, - audio_file: UploadFile, + converse_with_audio_request: ConverseWithAudioRequest, user: AuthenticatedUser = Depends(get_current_user), ) -> ConverseResponse: """ Starts or continues a conversation with the Teacher with audio. - :param conversation_id: The conversation ID being discussed. - :param audio_file: The file of the recorded audio. + :param converse_with_audio_request: The request object. :param user: The AuthenticatedUser making the request. :returns: ConverseResponse of mistakes and conversation. """ # TODO: Should be the same endpoint as above. - # TODO: For some reason POST vars and File uploads are a mess. Fix all of - # this nonsense. - # This is because the conversation_id is passed as a str, not a model. - message = await get_text_from_audio(audio_file) + message = await get_text_from_audio( + converse_with_audio_request.audio_file, + converse_with_audio_request.language_code, + ) - if conversation_id == "new": + if converse_with_audio_request.conversation_id == "new": return await initialize_conversation( await UserModel.get(username=user.username), message, ) return await get_response( - conversation_id, + converse_with_audio_request.conversation_id, message, await UserModel.get(username=user.username), )