In [40]:
import pyaudio
import sounddevice as sd
import numpy as np
import time
import wave
import pyttsx3
import subprocess
from typing import List, Dict, Any
import asyncio
import nest_asyncio
nest_asyncio.apply()
from dotenv import load_dotenv
import os
import scipy

# Specify the path to your .env file
dotenv_path = '/home/gabriel/myProject/myvenv/.env'



In [2]:
def list_pa_audio_output_devices():
    """
    This function lists all available audio output devices on the system.
    
    It uses the pyaudio library to query the available audio devices and 
    filters out the output devices. The function returns a list of dictionaries 
    containing the device name and its corresponding index.
    
    Returns:
        List[Dict[str, Any]]: A list of dictionaries with 'name' and 'index' keys.
    """
    # Initialize PyAudio
    p = pyaudio.PyAudio()
    
    # Initialize an empty list to store output devices
    output_devices = []
    
    # Get the number of available devices
    device_count = p.get_device_count()
    
    # Iterate over the devices and filter out the output devices
    for idx in range(device_count):
        device_info = p.get_device_info_by_index(idx)
        if device_info['maxOutputChannels'] > 0:
            output_devices.append({
                'name': device_info['name'],
                'index': idx
            })
    
    # Terminate the PyAudio instance
    p.terminate()
    
    return output_devices

In [3]:
def list_sd_audio_output_devices():
    """
    This function lists all available audio output devices on the system.
    
    It uses the sounddevice library to query the available audio devices and 
    filters out the output devices. The function returns a list of dictionaries 
    containing the device name and its corresponding index.
    
    Returns:
        List[Dict[str, Any]]: A list of dictionaries with 'name' and 'index' keys.
    """
    # Get the list of all available audio devices
    devices = sd.query_devices()
    
    # Initialize an empty list to store output devices
    output_devices = []
    
    # Iterate over the devices and filter out the output devices
    for idx, device in enumerate(devices):
        if device['max_output_channels'] > 0:
            output_devices.append({
                'name': device['name'],
                'index': idx
            })
    
    return output_devices

In [4]:
# Example usage
output_devices = list_pa_audio_output_devices()
for device in output_devices:
    print(f"Device Index: {device['index']}, Device Name: {device['name']}")



In [5]:
# Example usage
output_devices = list_sd_audio_output_devices()
for device in output_devices:
    print(f"Device Index: {device['index']}, Device Name: {device['name']}")



In [6]:
def is_sample_rate_supported(device_index, sample_rate):
    """
    Check if the given sample rate is supported by the specified device.
    
    Args:
        device_index (int): The index of the audio output device.
        sample_rate (int): The sample rate to check.
    
    Returns:
        bool: True if the sample rate is supported, False otherwise.
    """
    p = pyaudio.PyAudio()
    try:
        if device_index is None:
            device_index = p.get_default_input_device_info()['index']
        device_info = p.get_device_info_by_index(device_index)
        supported = p.is_format_supported(sample_rate,
                                          output_device=device_index,
                                          output_channels=1,
                                          output_format=pyaudio.paInt16)
    except ValueError:
        supported = False
    p.terminate()
    return supported

In [7]:
def generate_sound_on_device(device_index, duration=1, frequency=440, sample_rate=44100):
    """
    This function generates a sound on a specified audio output device for a given duration.
    
    Args:
        device_index (int): The index of the audio output device.
        duration (float): The duration of the sound in seconds. Default is 1 second.
        frequency (float): The frequency of the sound in Hz. Default is 440 Hz (A4 note).
        sample_rate (int): The sample rate in Hz. Default is 44100 Hz.
    
    Returns:
        None
    """
    if not is_sample_rate_supported(device_index, sample_rate):
        raise ValueError(f"Sample rate {sample_rate} is not supported by device {device_index}")
    
    # Initialize PyAudio
    p = pyaudio.PyAudio()
    
    # Generate the sound wave
    t = np.linspace(0, duration, int(sample_rate * duration), endpoint=False)
    wave = 0.5 * np.sin(2 * np.pi * frequency * t)
    
    # Convert the wave to 16-bit PCM format
    wave = (wave * 32767).astype(np.int16)
    
    # Open the audio stream
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=sample_rate,
                    output=True,
                    output_device_index=device_index)
    
    # Play the sound
    stream.write(wave.tobytes())
    
    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    
    # Terminate the PyAudio instance
    p.terminate()

In [8]:
def capture_speech_from_microphone(duration=5, sample_rate=44100, channels=1, output_filename="output.wav"):
    """
    This function captures audio from the microphone for a specified duration and saves it to a WAV file.
    
    Args:
        duration (float): The duration of the recording in seconds. Default is 5 seconds.
        sample_rate (int): The sample rate in Hz. Default is 44100 Hz.
        channels (int): The number of audio channels. Default is 1 (mono).
        output_filename (str): The name of the output WAV file. Default is "output.wav".
    
    Returns:
        None
    """
    if not is_sample_rate_supported(None, sample_rate):
        raise ValueError(f"Sample rate {sample_rate} is not supported by the default input device")
    
    # Initialize PyAudio
    p = pyaudio.PyAudio()
    
    # Open the audio stream for recording
    stream = p.open(format=pyaudio.paInt16,
                    channels=channels,
                    rate=sample_rate,
                    input=True,
                    frames_per_buffer=1024)
    
    print("Recording...")
    
    # Initialize an empty list to store the recorded frames
    frames = []
    
    # Calculate the number of frames to record
    num_frames = int(sample_rate / 1024 * duration)
    
    # Record the audio
    for _ in range(num_frames):
        data = stream.read(1024)
        frames.append(data)
    
    print("Recording finished.")
    
    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    
    # Terminate the PyAudio instance
    p.terminate()
    
    # Save the recorded frames to a WAV file
    with wave.open(output_filename, 'wb') as wf:
        wf.setnchannels(channels)
        wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        wf.setframerate(sample_rate)
        wf.writeframes(b''.join(frames))

In [9]:
def play_speech_on_device(file_path, device_index):
    """
    This function plays a sound file on a specified audio output device.
    
    Args:
        file_path (str): The path to the sound file (WAV format).
        device_index (int): The index of the audio output device.
    
    Returns:
        None
    """
    # Open the sound file
    wf = wave.open(file_path, 'rb')
    
    # Initialize PyAudio
    p = pyaudio.PyAudio()
    
    # Check if the device supports the desired sample rate
    try:
        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                        channels=wf.getnchannels(),
                        rate=wf.getframerate(),
                        output=True,
                        output_device_index=device_index)
    except Exception as e:
        print(f"Error: {e}")
        print("Falling back to default sample rate.")
        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                        channels=wf.getnchannels(),
                        rate=wf.getframerate(),
                        output=True)
    
    
    # Read data from the sound file
    data = wf.readframes(1024)
    
    # Play the sound
    while data:
        stream.write(data)
        data = wf.readframes(1024)
    
    # Stop and close the stream
    stream.stop_stream()
    stream.close()
    
    # Terminate the PyAudio instance
    p.terminate()

In [10]:
def pico_text_to_speech(text, output_filename="tts_output.wav", language='de-DE'):
    """
    This function converts text to speech using Pico TTS and saves it to a WAV file.
    
    Args:
        text (str): The text to convert to speech.
        output_filename (str): The name of the output WAV file. Default is "tts_output.wav".
    
    Returns:
        None
    """
    # Use Pico TTS to convert text to speech and save it to a WAV file
    command = ['pico2wave', '--wave', output_filename, "-l", language, text]
    subprocess.run(command, check=True)

In [11]:
import whisper

# Load the Whisper model
model = whisper.load_model("small")

In [12]:
# Transcribe an audio file
result = model.transcribe("output.wav")

# Print the transcription
print("Transcription:", result["text"])



In [13]:
# Example usage: Convert text to speech and save it to "tts_output.wav"
pico_text_to_speech("Hallo zusammen, ich freue mich über tolle Fortschritte", output_filename="tts_output.wav")

In [14]:
# Example usage: Play the TTS output file "tts_output.wav" on device 7
play_speech_on_device(file_path="tts_output.wav", device_index=7)

In [15]:
# Example usage: Generate a sound on device 3 for 1 second
generate_sound_on_device(device_index=7, duration=1)

In [16]:
# Example usage: Capture voice from the microphone for 15 seconds and save it to "output.wav"
capture_speech_from_microphone(duration=15, output_filename="output.wav")



In [17]:
# Example usage: Generate a sound on device 3 for 1 second
generate_sound_on_device(device_index=7, duration=1)

In [18]:
# Example usage: Play the speech file "output.wav" on device 3
play_speech_on_device(file_path="output.wav", device_index=7)

In [19]:
# Transcribe an audio file
result = model.transcribe("output.wav")

# Print the transcription
print("Transcription:", result["text"])



In [20]:
pico_text_to_speech(result["text"], output_filename="tts2_output.wav")


In [21]:
# Example usage: Play the speech file "output.wav" on device 3
play_speech_on_device(file_path="tts2_output.wav", device_index=7)

In [22]:
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

from langchain_community.document_loaders import PyPDFLoader

async def upload_file(file_path):
    """
    Upload a file as a data source for the LLM.
    Args:
        file_path (str): The path to the file to upload.
    Returns:
        LocalFileLoader: The loaded file.
    """
    loader = PyPDFLoader(file_path)
    pages = []
    async for page in loader.alazy_load():
        pages.append(page)
    return loader.load()

In [23]:
def ask_question(documents, question):
    """
    Ask a question to the LLM using the uploaded documents.
    Args:
        documents (List[Document]): The list of documents to use as context.
        question (str): The question to ask.
    Returns:
        str: The response from the LLM.
    """
   
    # Load environment variables from .env file
    load_dotenv(dotenv_path)

    # Access the API key
    openai_api_key = os.getenv('OPENAI_API_KEY')
    llm = OpenAI(temperature=0, openai_api_key=openai_api_key)
    chain = load_qa_chain(llm=llm, chain_type='stuff')
    response = chain.run(input_documents=documents, question=question)
    return response

In [24]:
def print_response(response):
    """
    Print the response from the LLM.
    Args:
        response (str): The response to print.
    Returns:
        None
    """
    print("Response:", response)

In [25]:
file_path = "/home/gabriel/myProject/myDocs/CV_Gabriel_250302_G.pdf"
documents = asyncio.run(upload_file(file_path))
# Define the role or instructions for the OpenAI model



In [26]:
# Example usage: Generate a sound on device 3 for 1 second
generate_sound_on_device(device_index=7, duration=1)

In [41]:

question = """
You are a candidate for a job interview.
Your resume is the file that is uploaded.
In this file, you find everything about you,
that you should know
Your task is to respond to questions in German
that the human ressources manager is asking you.
The human resource manager is sitting in front of you.
You only respond to the following question: Say hello to him in few nice words.
"""
response = ask_question(documents,question)
print_response(response)
pico_text_to_speech(response, output_filename="output1.wav")
# Play the response file "output1.wav" on device 3
play_speech_on_device(file_path="output1.wav", device_index=7)
# Example usage: Generate a sound on device 3 for 1 second
generate_sound_on_device(device_index=7, duration=1)



In [28]:
generate_sound_on_device(device_index=7, duration=1)

# Capture question from the microphone for 15 seconds and save it to "input2.wav"
capture_speech_from_microphone(duration=15, output_filename="input2.wav")
# Transcribe an audio file
result = model.transcribe("input2.wav")
# Print the transcription
print("Transcription:", result["text"])

question = f"""
You are a candidate for a job interview.
Your resume is the file that is uploaded.
In this file, you find everything about you,
that you should know
Your task is to respond to questions in German
that the human ressources manager is asking you.
The human resource manager is sitting in front of you.
You only respond to the following question:
{result["text"]}
"""

response = ask_question(documents,question)
print_response(response)
pico_text_to_speech(response, output_filename="output2.wav")
# Play the response file "output2.wav" on device 3
play_speech_on_device(file_path="output2.wav", device_index=7)
# Example usage: Generate a sound on device 3 for 1 second
generate_sound_on_device(device_index=7, duration=1)




In [29]:

generate_sound_on_device(device_index=7, duration=1)

# Capture question from the microphone for 15 seconds and save it to "input2.wav"
capture_speech_from_microphone(duration=15, output_filename="input3.wav")
# Transcribe an audio file
result = model.transcribe("input3.wav")
# Print the transcription
print("Transcription:", result["text"])

question = f""""
You are a candidate for a job interview.
Your resume is the file that is uploaded.
In this file, you find everything about you,
that you should know
Your task is to respond to questions in German
that the human ressources manager is asking you.
The human resource manager is sitting in front of you.
You only respond to the following question:
{result["text"]}
"""
response = ask_question(documents,question)
print_response(response)
pico_text_to_speech(response, output_filename="output3.wav")
# Play the response file "output3.wav" on device 7
play_speech_on_device(file_path="output3.wav", device_index=7)
# Example usage: Generate a sound on device 3 for 1 second
generate_sound_on_device(device_index=7, duration=1)



In [45]:
from scipy.signal import resample


def convert_audio_params(frames, original_params, target_params):
    # Konvertieren der Audiodaten, um sie an die Zielparameter anzupassen
    audio_array = np.frombuffer(frames, dtype=np.int16)

    # Anpassen der Abtastrate
    if original_params.framerate != target_params.framerate:
        # Hier könnte eine Bibliothek wie `librosa` oder `scipy` verwendet werden, um die Abtastrate zu ändern
        # Resample the audio to the target framerate
        num_samples = int(len(audio_array) * target_params.framerate / original_params.framerate)
        audio_array = resample(audio_array, num_samples).astype(np.int16)

    # Anpassen der Anzahl der Kanäle
    if original_params.nchannels != target_params.nchannels:
        if target_params.nchannels == 1:
            # Mono: Mittelwert der Kanäle
            audio_array = np.mean(audio_array.reshape(-1, original_params.nchannels), axis=1).astype(np.int16)
        elif target_params.nchannels == 2:
            # Stereo: Duplizieren des Monokanals oder Beibehalten der Stereokanäle
            if original_params.nchannels == 1:
                audio_array = np.repeat(audio_array, 2)

    # Anpassen der Sample-Breite (z.B. 16-bit auf 8-bit)
    if original_params.sampwidth != target_params.sampwidth:
        if target_params.sampwidth == 1:
            audio_array = (audio_array >> 8).astype(np.int8)
        elif target_params.sampwidth == 2:
            audio_array = (audio_array << 8).astype(np.int16)

    return audio_array.tobytes()



In [46]:
def my_wav_aggregator(masterfile, addfile):
    # Öffnen der Master-Datei, falls vorhanden
    if masterfile and os.path.exists(masterfile):
        with wave.open(masterfile, 'rb') as master_wav:
            master_frames = master_wav.readframes(master_wav.getnframes())
            master_params = master_wav.getparams()
    else:
        master_frames = b''
        master_params = None
        masterfile = addfile

    # Öffnen der hinzuzufügenden Datei
    with wave.open(addfile, 'rb') as add_wav:
        add_frames = add_wav.readframes(add_wav.getnframes())
        add_params = add_wav.getparams()

        # Wenn die Master-Datei nicht existiert, verwenden wir die Parameter der hinzuzufügenden Datei
        if master_params is None:
            master_params = add_params
            master_frames = add_frames
        else:
            # Anpassen der Parameter der hinzuzufügenden Datei an die Master-Datei
            if add_params.nchannels != master_params.nchannels or \
               add_params.sampwidth != master_params.sampwidth or \
               add_params.framerate != master_params.framerate:
                add_frames = convert_audio_params(add_frames, add_params, master_params)

    # Zusammenführen der Audiodaten
    master_frames += add_frames

    # Schreiben der kombinierten Daten in die Master-Datei
    with wave.open(masterfile, 'wb') as combined_wav:
        combined_wav.setparams(master_params)
        combined_wav.writeframes(master_frames)

    return masterfile

In [50]:
masterfile=my_wav_aggregator('output1.wav', 'input2.wav')  

In [51]:
masterfile=my_wav_aggregator('output1.wav', 'output2.wav')

In [52]:
masterfile=my_wav_aggregator('output1.wav', 'input3.wav')

In [53]:
masterfile=my_wav_aggregator('output1.wav', 'output3.wav')


In [54]:
play_speech_on_device(file_path="output1.wav", device_index=7)

In [42]:
import wave

def list_wav_file_params(file_path):
    """
    List the essential parameters of a WAV file.
    
    Args:
        file_path (str): The path to the WAV file.
    
    Returns:
        Dict[str, Any]: A dictionary containing the parameters of the WAV file.
    """
    with wave.open(file_path, 'rb') as wav_file:
        params = {
            'nchannels': wav_file.getnchannels(),
            'sampwidth': wav_file.getsampwidth(),
            'framerate': wav_file.getframerate(),
            'nframes': wav_file.getnframes(),
            'comptype': wav_file.getcomptype(),
            'compname': wav_file.getcompname()
        }
    return params

# Example usage
file_params = list_wav_file_params('output1.wav')
print(file_params)
file_params = list_wav_file_params('input2.wav')
print(file_params)
file_params = list_wav_file_params('output2.wav')
print(file_params)

