Supports Speech to Text (#23)

conor-f · Sep 15, 2023 · d2b7707 · d2b7707
1 parent 4c16f7b
commit d2b7707
Show file tree

Hide file tree

Showing 3 changed files with 57 additions and 2 deletions.
diff --git a/fia_api/web/api/teacher/schema.py b/fia_api/web/api/teacher/schema.py
@@ -132,4 +132,5 @@ class ConverseResponse(BaseModel):
 
     conversation_id: str
     learning_moments: LearningMoments
+    input_message: str
     conversation_response: str
diff --git a/fia_api/web/api/teacher/utils.py b/fia_api/web/api/teacher/utils.py
@@ -4,6 +4,7 @@
 from typing import Any, Dict, List
 
 import openai
+from fastapi import UploadFile
 from loguru import logger
 
 from fia_api.db.models.conversation_model import (
@@ -253,6 +254,7 @@ async def get_response(
     return ConverseResponse(
         conversation_id=conversation_id,
         learning_moments=learning_moments,
+        input_message=message,
         conversation_response=conversation_continuation.message,
     )
 
@@ -287,3 +289,19 @@ async def initialize_conversation(
     await TokenUsageModel.create(conversation_id=conversation_id)
 
     return await get_response(str(conversation_id), message, user)
+
+
+async def get_text_from_audio(audio_file: UploadFile) -> str:
+    """
+    Given a file, return the text.
+
+    :param audio_file: UploadFile object to transcode to text.
+    :return: String text.
+    """
+    # TODO: Shouldn't have to do this dance with writing/reading the file!
+    with open("/tmp/whatever.wav", "wb") as tmp_fh:  # noqa: S108
+        tmp_fh.write(audio_file.file.read())
+
+    with open("/tmp/whatever.wav", "rb") as in_fh:  # noqa: S108
+        # TODO: Store the token usage too
+        return openai.Audio.transcribe("whisper-1", in_fh)["text"]
diff --git a/fia_api/web/api/teacher/views.py b/fia_api/web/api/teacher/views.py
@@ -1,8 +1,12 @@
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, UploadFile
 
 from fia_api.db.models.user_model import UserModel
 from fia_api.web.api.teacher.schema import ConverseResponse, TeacherConverseRequest
-from fia_api.web.api.teacher.utils import get_response, initialize_conversation
+from fia_api.web.api.teacher.utils import (
+    get_response,
+    get_text_from_audio,
+    initialize_conversation,
+)
 from fia_api.web.api.user.schema import AuthenticatedUser
 from fia_api.web.api.user.utils import get_current_user
 
@@ -32,3 +36,35 @@ async def converse(
         converse_request.message,
         await UserModel.get(username=user.username),
     )
+
+
+@router.post("/converse-with-audio", response_model=ConverseResponse)
+async def converse_with_audio(
+    conversation_id: str,
+    audio_file: UploadFile,
+    user: AuthenticatedUser = Depends(get_current_user),
+) -> ConverseResponse:
+    """
+    Starts or continues a conversation with the Teacher with audio.
+
+    :param conversation_id: The conversation ID being discussed.
+    :param audio_file: The file of the recorded audio.
+    :param user: The AuthenticatedUser making the request.
+    :returns: ConverseResponse of mistakes and conversation.
+    """
+    # TODO: Should be the same endpoint as above.
+    # TODO: For some reason POST vars and File uploads are a mess. Fix all of
+    # this nonsense.
+    message = await get_text_from_audio(audio_file)
+
+    if conversation_id == "new":
+        return await initialize_conversation(
+            await UserModel.get(username=user.username),
+            message,
+        )
+
+    return await get_response(
+        conversation_id,
+        message,
+        await UserModel.get(username=user.username),
+    )