In [1]:
import pygame
import torch
import sounddevice as sd
import os
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
from scipy.io.wavfile import write
from dotenv import load_dotenv
from openai import OpenAI
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

pygame 2.5.2 (SDL 2.28.3, Python 3.11.8)
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
load_dotenv()
openai_api_key=os.getenv("OPENAI_API_KEY")
client=OpenAI()

In [3]:
llm=ChatOpenAI()
prompt=ChatPromptTemplate.from_messages([
    ("system", "Respond conversationally to the given input, in whatever language it is given in"),
    ("user", "{input}")
])
output_parser=StrOutputParser()
chain=prompt|llm|output_parser

In [4]:
device="cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
model_id="openai/whisper-base"
model=AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id,torch_dtype=torch_dtype,
    use_safetensors=True
)
model.to(device)
processor=AutoProcessor.from_pretrained(model_id)
pipe=pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    max_new_tokens=128,
    chunk_length_s=30,
    batch_size=16,
    return_timestamps=True,
    torch_dtype=torch_dtype,
    device=device,
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
def recordaudio(filename,duration=5,fs=44100):
    print('recording...')
    recording=sd.rec(int(duration*fs),samplerate=fs,channels=1)
    sd.wait()
    write(filename,fs,recording)
    result=pipe(filename,generate_kwargs={'language':lang})
    return result['text']

In [6]:
def play_audio(file):
    sound=pygame.mixer.Sound(file)
    recordlength=int(sound.get_length()*1000)
    sound.play()
    pygame.time.wait(recordlength)

In [7]:
def make_speech_file(speech_file_path,text):
    response=client.audio.speech.create(
        model="tts-1",
        voice="alloy",
        input=text
    )
    with open(speech_file_path,"wb") as f:
        f.write(response.content)
    #print("Speech file saved successfully!")

In [8]:
def translate_to_english(file):
    # audio_file=open(file, "rb")
    # translation=client.audio.translations.create(
    #     model="whisper-1",
    #     file=audio_file
    # )
    # #print(translation.text)
    result=pipe(file,generate_kwargs={'language':'en'})
    return result['text']

In [9]:
def nativize(lang,text):
    translator=ChatPromptTemplate.from_messages([
        ("system", "Translate this to the following ISO language: "+lang),
        ("user", "{input}")
    ])
    translate=translator|llm|output_parser
    native=translate.invoke({'input':text})
    #print(native)
    return native

In [10]:
def generate_response(text):
    generation=chain.invoke({'input':text})
    #print(generation)
    return generation

In [11]:
def machine_turn(text):
    machine_file='recordings/machine.wav'
    if text=='':
        eng='Hello, how are you today?'
        talk=nativize(lang,eng)
    else:
        talk=generate_response(text)
    make_speech_file(machine_file,talk)
    eng=translate_to_english(machine_file)
    play_audio(machine_file)
    print('Computer: '+talk+' ('+eng+')')

In [12]:
def human_turn():
    file='recordings/human.wav'
    talk=recordaudio(file)
    eng=translate_to_english(file)
    print('Me: '+talk+' ('+eng+')')
    return talk

In [13]:
def conversation():
    human_response=''
    x=0
    while x!=5:
        machine_turn(human_response)
        human_response=human_turn()
        x+=1
    pygame.quit()

In [14]:
lang='en'
pygame.init()
conversation()

Computer: Hello, how are you today? ( Hello, how are you today?)
recording...
Me:  I'm doing well. How about you? ( I'm doing well. How about you?)
That's great to hear! I'm doing well too, thank you for asking. Anything exciting happening in your day?
Computer: That's great to hear! I'm doing well too, thank you for asking. Anything exciting happening in your day? ( That's great to hear. I'm doing well too. Thank you for asking. Anything exciting happening in your day?)
recording...


KeyboardInterrupt: 

In [15]:
pygame.quit()