## Chat with Me

### How long will you speak for?
e.g. 5 seconds

In [2]:
speak_time = 5

### What language do you want to converse in?'
e.g. English

In [57]:
languages = ['en-US', 'zh-CN', 'zh-TW', 'es-ES', 'es-MX', 'fr-FR', 'fr-CA']
lang = languages[1]

### What is your proficiency in the language?

In [None]:
levels = ['beginner', 'intermediate', 'advanced']
level = levels[0]

### Let's Begin!

In [59]:
chat_with_me(speak_time, lang)


Recording...
Finished recording.
You: 能不能教我咋点饮料
Friend: 当然可以！您想知道如何在餐厅点饮料吗？通常您可以告诉服务员您想要的饮料种类和大小，比如冰水、可乐、咖啡等等。您还可以问一些关于饮料的问题，比如是否有特色饮料推荐或者是否有无糖选项。有了这些信息，您就可以轻松地点您喜欢的饮料了！希望这些提示能帮助到您。


In [54]:
import os
import json
with open('config.json') as config_file:
    config = json.load(config_file)
os.environ['OPENAI_API_KEY'] = config["OPENAI_KEY"]
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = config["GOOGLE_APPLICATION_CREDENTIALS"]


def chat_with_me(speak_time, lang):
    record_me(speak_time, "output.wav")
    transcription_out  = audio_to_text("output.wav", language = lang)
    chat_out = chatGPT_response(transcription_out, level)
    text_to_audio(chat_out, lang)



### Live Speech -> Text

In [21]:
import pyaudio
import wave

def record_me(RECORD_SECONDS=5, WAVE_OUTPUT_FILENAME="output.wav"):
    FORMAT = pyaudio.paInt16  # Audio format (16-bit PCM)
    CHANNELS = 1              # Single channel for microphone
    RATE = 44100              # Sampling rate
    CHUNK = 1024              # Number of frames per buffer
    device_index = None       # Device index or None to select default

    audio = pyaudio.PyAudio()

    # Open audio stream
    stream = audio.open(format=FORMAT, channels=CHANNELS,
                        rate=RATE, input=True,
                        frames_per_buffer=CHUNK,
                        input_device_index=device_index)

    print("Recording...")

    frames = []

    # Start recording
    for i in range(int(RATE / CHUNK * RECORD_SECONDS)):
        data = stream.read(CHUNK)
        frames.append(data)

    print("Finished recording.")

    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    audio.terminate()

    # Save the recorded data as a WAV file
    wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(audio.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(b''.join(frames))
    wf.close()

### Audio -> Text

In [50]:
from google.cloud import speech
from google.oauth2 import service_account

def audio_to_text(speech_file, language):
    # Path to your service account key file
    key_file_path = "multilingual-chat-bot.json"

    # Create credentials using the service account key file
    credentials = service_account.Credentials.from_service_account_file(
        key_file_path,
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )

    # Authenticate the client using the credentials
    # client = texttospeech.TextToSpeechClient(credentials=credentials)

    client = speech.SpeechClient(credentials=credentials)

    with open(speech_file, "rb") as audio_file:
        content = audio_file.read()

    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        language_code=language,  # Language of the audio
    )

    response = client.recognize(config=config, audio=audio)

    for result in response.results:
        # print("Transcript: {}".format(result.alternatives[0].transcript))
        print("You: " + result.alternatives[0].transcript)
    return result.alternatives[0].transcript

### Text Input -> ChatGPT

In [53]:
import os
from openai import OpenAI
import json


def chatGPT_response(input):
    # Load the OpenAI API key from the environment variable
    api_key = os.environ.get("OPENAI_API_KEY")

    # Create the OpenAI client
    client = OpenAI(api_key=api_key)

    # Create a chat completion
    completion = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are a great conversationalist."},
            {"role": "user", "content": "Reply to: " + input}
        ]
    )
    # Get the response message
    response_message = completion.choices[0].message
    # Extract the relevant parts
    # result = {
    #     "message_role": response_message.role,
    #     "message_content": response_message.content
    # }
    # Pretty-print the output
    # print(json.dumps(result, indent=2))
    print("Friend: " + response_message.content)
    return (response_message.content)

### Text -> Audio

In [52]:
from google.cloud import texttospeech
from google.oauth2 import service_account
from IPython.display import Audio


def text_to_audio(output, language):
    # Path to your service account key file
    key_file_path = "multilingual-chat-bot.json"

    # Create credentials using the service account key file
    credentials = service_account.Credentials.from_service_account_file(
        key_file_path,
        scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )

    # Authenticate the client using the credentials
    client = texttospeech.TextToSpeechClient(credentials=credentials)


    # Set the text input to be synthesized
    synthesis_input = texttospeech.SynthesisInput(text=output)

    # Build the voice request
    voice = texttospeech.VoiceSelectionParams(
        language_code=language, ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
    )

    # Select the type of audio file you want returned
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    # Perform the text-to-speech request
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice, audio_config=audio_config
    )
    # The response's audio_content is binary.
    with open("output.mp3", "wb") as out:
        out.write(response.audio_content)
        # print('Audio content written to file "output.mp3"')
    display(Audio('output.mp3', autoplay=True))
