Skip to content

Commit

Permalink
Supports Speech to Text (#23)
Browse files Browse the repository at this point in the history
  • Loading branch information
conor-f committed Sep 15, 2023
1 parent 4c16f7b commit d2b7707
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 2 deletions.
1 change: 1 addition & 0 deletions fia_api/web/api/teacher/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,4 +132,5 @@ class ConverseResponse(BaseModel):

conversation_id: str
learning_moments: LearningMoments
input_message: str
conversation_response: str
18 changes: 18 additions & 0 deletions fia_api/web/api/teacher/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any, Dict, List

import openai
from fastapi import UploadFile
from loguru import logger

from fia_api.db.models.conversation_model import (
Expand Down Expand Up @@ -253,6 +254,7 @@ async def get_response(
return ConverseResponse(
conversation_id=conversation_id,
learning_moments=learning_moments,
input_message=message,
conversation_response=conversation_continuation.message,
)

Expand Down Expand Up @@ -287,3 +289,19 @@ async def initialize_conversation(
await TokenUsageModel.create(conversation_id=conversation_id)

return await get_response(str(conversation_id), message, user)


async def get_text_from_audio(audio_file: UploadFile) -> str:
"""
Given a file, return the text.
:param audio_file: UploadFile object to transcode to text.
:return: String text.
"""
# TODO: Shouldn't have to do this dance with writing/reading the file!
with open("/tmp/whatever.wav", "wb") as tmp_fh: # noqa: S108
tmp_fh.write(audio_file.file.read())

with open("/tmp/whatever.wav", "rb") as in_fh: # noqa: S108
# TODO: Store the token usage too
return openai.Audio.transcribe("whisper-1", in_fh)["text"]
40 changes: 38 additions & 2 deletions fia_api/web/api/teacher/views.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from fastapi import APIRouter, Depends
from fastapi import APIRouter, Depends, UploadFile

from fia_api.db.models.user_model import UserModel
from fia_api.web.api.teacher.schema import ConverseResponse, TeacherConverseRequest
from fia_api.web.api.teacher.utils import get_response, initialize_conversation
from fia_api.web.api.teacher.utils import (
get_response,
get_text_from_audio,
initialize_conversation,
)
from fia_api.web.api.user.schema import AuthenticatedUser
from fia_api.web.api.user.utils import get_current_user

Expand Down Expand Up @@ -32,3 +36,35 @@ async def converse(
converse_request.message,
await UserModel.get(username=user.username),
)


@router.post("/converse-with-audio", response_model=ConverseResponse)
async def converse_with_audio(
conversation_id: str,
audio_file: UploadFile,
user: AuthenticatedUser = Depends(get_current_user),
) -> ConverseResponse:
"""
Starts or continues a conversation with the Teacher with audio.
:param conversation_id: The conversation ID being discussed.
:param audio_file: The file of the recorded audio.
:param user: The AuthenticatedUser making the request.
:returns: ConverseResponse of mistakes and conversation.
"""
# TODO: Should be the same endpoint as above.
# TODO: For some reason POST vars and File uploads are a mess. Fix all of
# this nonsense.
message = await get_text_from_audio(audio_file)

if conversation_id == "new":
return await initialize_conversation(
await UserModel.get(username=user.username),
message,
)

return await get_response(
conversation_id,
message,
await UserModel.get(username=user.username),
)

0 comments on commit d2b7707

Please sign in to comment.