<a href="https://colab.research.google.com/github/VeluruMonicagithub/INFOSYS_VIRTUAL_INTERNSHIP/blob/main/AUDIOTOTEXT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
from IPython.display import Javascript, display, Audio
from google.colab.output import eval_js
from base64 import b64decode
from pydub import AudioSegment
import io

def record_audio(filename='input_audio.wav'):
  """
  Records audio from the microphone in Google Colab and saves it to a WAV file.
  A recording widget with 'START' and 'STOP' buttons will be displayed.
  The function waits for the user to stop the recording.
  """
  js_interactive_recorder = """
    async function recordAudio() {
      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
      const mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' }); // Record as webm
      const audioChunks = [];
      mediaRecorder.ondataavailable = event => audioChunks.push(event.data);

      const startButton = document.createElement('button');
      startButton.textContent = 'START';
      startButton.style.background = '#4CAF50';
      startButton.style.color = 'white';
      startButton.style.padding = '10px 20px';
      startButton.style.border = 'none';
      startButton.style.borderRadius = '5px';
      startButton.style.margin = '5px';

      const stopButton = document.createElement('button');
      stopButton.textContent = 'STOP';
      stopButton.style.background = '#f44336';
      stopButton.style.color = 'white';
      stopButton.style.padding = '10px 20px';
      stopButton.style.border = 'none';
      stopButton.style.borderRadius = '5px';
      stopButton.style.margin = '5px';
      stopButton.disabled = true; // Initially disabled

      const container = document.createElement('div');
      container.appendChild(startButton);
      container.appendChild(stopButton);
      document.body.appendChild(container);

      let resolveRecording = null;
      const recordingPromise = new Promise(resolve => { resolveRecording = resolve; });

      startButton.onclick = () => {
        audioChunks.length = 0; // Clear previous recordings
        mediaRecorder.start();
        startButton.disabled = true;
        stopButton.disabled = false;
        startButton.textContent = 'RECORDING...';
      };

      stopButton.onclick = () => {
        mediaRecorder.stop();
        startButton.disabled = false;
        stopButton.disabled = true;
        startButton.textContent = 'START';
      };

      mediaRecorder.onstop = async () => {
        const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
        const reader = new FileReader();
        reader.readAsDataURL(audioBlob);
        reader.onloadend = () => {
            container.remove(); // Remove buttons after recording
            resolveRecording(reader.result); // Resolve the promise with base64 data
        };
      };

      return recordingPromise; // Return the promise
    }
    recordAudio(); // Call the async function
  """

  audio_data_base64 = eval_js(js_interactive_recorder)

  if audio_data_base64:

    webm_bytes = b64decode(audio_data_base64.split(',')[1])


    try:
        audio = AudioSegment.from_file(io.BytesIO(webm_bytes), format="webm")

        audio.export(filename, format="wav",
                     parameters=["-ac", "1", "-ar", "16000", "-sample_fmt", "s16"])
        print(f"Audio saved to {filename}")
        return Audio(filename)
    except Exception as e:
        print(f"Error converting webm to wav: {e}")
        return None
  else:
    print("Audio recording failed or was cancelled.")
    return None

print("Defined custom record_audio function for microphone input, now supporting webm to wav conversion (playback fix applied).")

Defined custom record_audio function for microphone input, now supporting webm to wav conversion (playback fix applied).


In [33]:
from google.colab._message import MessageError

print("Please click 'START' to begin recording and 'STOP' when finished.\n")
try:
  recorded_audio = record_audio('input_audio.wav')

  if recorded_audio:
    print("Recording complete. You can play it back below:")
    display(recorded_audio)
  else:
    print("No audio was recorded or an error occurred.")
except MessageError as e:
  if "NotAllowedError: Permission denied" in str(e):
    print("Error: Microphone access denied by the browser.")
    print("Please ensure you grant microphone permission when prompted by your browser.")
    print("You may need to check your browser's site settings for this notebook to allow microphone access.")
  else:
    print(f"An unexpected error occurred during recording: {e}")
  recorded_audio = None # Ensure recorded_audio is None in case of error

Please click 'START' to begin recording and 'STOP' when finished.

Audio saved to input_audio.wav
Recording complete. You can play it back below:


In [34]:
pip install SpeechRecognition pydub soundfile



In [35]:
import speech_recognition as sr
import soundfile as sf
import os

# Create an instance of the Recognizer class
r = sr.Recognizer()

# Define the input and output filenames
input_audio_path = 'input_audio.wav'
processed_audio_path = 'processed_input_audio.wav'

try:
    # Read the audio data using soundfile (more robust for malformed WAVs)
    data, samplerate = sf.read(input_audio_path)

    # Ensure audio is mono by selecting the first channel if stereo
    if data.ndim > 1:
        data = data[:, 0]

    # Write the audio to a new WAV file in a standard PCM format
    sf.write(processed_audio_path, data, samplerate, subtype='PCM_16')
    print(f"Audio successfully processed and saved to {processed_audio_path}")

    # Load the processed audio file with SpeechRecognition
    with sr.AudioFile(processed_audio_path) as source:
        # Read the audio data from the source
        audio_data = r.record(source)

        # Use Google Web Speech API to transcribe the audio
        transcribed_text = r.recognize_google(audio_data)
        print("\nTranscribed Text:")
        print(transcribed_text)

except FileNotFoundError:
    print(f"Error: Audio file '{input_audio_path}' not found. Please ensure it was recorded correctly.")
except Exception as e:
    # Catch specific soundfile errors for better debugging
    if 'Error opening' in str(e) or 'Bad file' in str(e):
        print(f"Error: `pysoundfile` could not read the audio file '{input_audio_path}'. It might be corrupted or in an unsupported format: {e}")
    else:
        print(f"An unexpected error occurred during audio processing or transcription: {e}")
except sr.UnknownValueError:
    print("Google Speech Recognition could not understand audio")
except sr.RequestError as e:
    print(f"Could not request results from Google Speech Recognition service; {e}")
finally:
    # Clean up the processed audio file if it was created
    if os.path.exists(processed_audio_path):
        os.remove(processed_audio_path)
        print(f"Cleaned up temporary file: {processed_audio_path}")

Audio successfully processed and saved to processed_input_audio.wav

Transcribed Text:
quote converts audio to text fight
Cleaned up temporary file: processed_input_audio.wav


In [36]:
output_filename = 'transcribed_text.txt'

try:
    with open(output_filename, 'w') as f:
        f.write(transcribed_text)
    print(f"Transcript successfully saved to {output_filename}")
except Exception as e:
    print(f"Error saving transcript to file: {e}")

Transcript successfully saved to transcribed_text.txt
