<a href="https://colab.research.google.com/github/aeleraqi/Vid2TXT/blob/main/Vid2TXT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Install Dependencies and Import Libraries

In [1]:
# Install necessary libraries
!pip install pydub
!pip install SpeechRecognition
!pip install python-docx
!pip install mercury
!pip install pandas

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl.metadata (28 kB)
Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl (32.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m19.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.4
Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2
Collecting mercury
  Downloading mercu

In [2]:
# Import necessary libraries
import os
import io
import csv
from datetime import datetime, timedelta
import pandas as pd
from pydub import AudioSegment
from pydub.silence import split_on_silence
import speech_recognition as sr
from docx import Document
import mercury as mr

###Speech Recognition Function

In [3]:
def recognize_speech_google(audio_file):
    """
    Recognize speech from an audio file using Google Speech Recognition.
    """
    recognizer = sr.Recognizer()

    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)

    try:
        # Recognize the audio, assuming the language is Arabic
        text = recognizer.recognize_google(audio, language='ar-EG')
    except sr.UnknownValueError:
        text = "(Could not understand)"
    except sr.RequestError as e:
        text = f"(Error: {e})"

    return text

###Main Function to Process Audio and Save Output to DOCX

In [4]:
def main(audio_file, output_docx):
    """
    Process an audio file to recognize speech and save results to a DOCX file.
    """
    # Load the audio file
    audio = AudioSegment.from_file(audio_file)

    # Split audio into chunks based on silence
    chunks = split_on_silence(audio, min_silence_len=500, silence_thresh=-40)

    output_data = []
    start_time = datetime.now()

    # Process each chunk
    for chunk in chunks:
        # Export each chunk to a temporary wav file
        chunk.export("temp.wav", format="wav")

        # Recognize speech from the exported chunk
        text = recognize_speech_google("temp.wav")

        # Calculate timestamp for the chunk
        duration = timedelta(seconds=chunk.duration_seconds)
        end_time = start_time + duration
        timestamp = end_time.strftime("%H:%M:%S")

        # Append timestamp and recognized text to the output data
        output_data.append((timestamp, text))

        # Update start time for the next chunk
        start_time = end_time

    # Create a new Word document
    doc = Document()

    # Add a title to the document
    doc.add_heading('Speech Recognition Output', 0)

    # Create a table with two columns (Timestamp and Text)
    table = doc.add_table(rows=1, cols=2)

    # Set the headers
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = 'Timestamp'
    hdr_cells[1].text = 'Text'

    # Add the data to the table
    for timestamp, text in output_data:
        row_cells = table.add_row().cells
        row_cells[0].text = timestamp
        row_cells[1].text = text

    # Save the document
    doc.save(output_docx)


###Main Script Execution

In [None]:
from google.colab import files

if __name__ == "__main__":
    # Upload the audio file using Google Colab's file upload widget
    print("Please upload your audio file:")
    uploaded = files.upload()

    # Get the file name from the uploaded files
    audio_file = list(uploaded.keys())[0]

    # Specify the output DOCX file name
    output_docx = "output.docx"

    # Call the main function with the uploaded audio file
    main(audio_file, output_docx)


Please upload your audio file:
