In [None]:
from pydub import AudioSegment

def chunk_mp4_file(input_file, chunk_length_ms):
    # Load the audio file
    audio = AudioSegment.from_file(input_file, format="mp4")

    # Calculate the number of chunks
    total_length_ms = len(audio)
    number_of_chunks = total_length_ms // chunk_length_ms + (1 if total_length_ms % chunk_length_ms else 0)

    # Split and export each chunk
    for i in range(number_of_chunks):
        start_ms = i * chunk_length_ms
        end_ms = min((i + 1) * chunk_length_ms, total_length_ms)
        chunk = audio[start_ms:end_ms]
        chunk.export(f"{input_file}_chunk_{i}.mp4", format="mp4")

    return number_of_chunks


In [None]:
num_chunks = chunk_mp4_file("./oracle/Building the more intelligent future of cloud Clay Magouyrk keynote  Oracle CloudWorld 2023.mp4", 20 * 60 * 1000)  # 20 minutes in milliseconds
print(f"total chunks created: {num_chunks}")

In [None]:
import whisper

def transcribe_mp4_files(file_list, output_file):
    # Initialize the Whisper model
    model = whisper.load_model("base")

    # Open the output file for writing the transcriptions
    with open(output_file, "w", encoding="utf-8") as output:
        # Loop through each file and transcribe
        for file in file_list:
            result = model.transcribe(file)
            transcription = result["text"]
            
            # Write the transcription to the output file
            output.write(transcription + "\n\n")


In [None]:
import os

def list_files_with_full_path(directory_path):
    """
    This function takes a directory path as input and returns a list of all files in that directory,
    with each file path being the full path to the file.

    :param directory_path: The path to the directory whose files need to be listed.
    :return: A list of full file paths in the given directory.
    """
    # Check if the directory exists
    if not os.path.exists(directory_path):
        return "Directory does not exist."

    # List all files in the directory with their full paths
    full_file_paths = [os.path.join(directory_path, file).replace('\\', '/') for file in os.listdir(directory_path) if os.path.isfile(os.path.join(directory_path, file))]
    return full_file_paths

In [None]:
file_list = list_files_with_full_path("./oracle")
#file_list
transcribe_mp4_files(file_list, "./oracle/transcripts/Building the more intelligent future of cloud Clay Magouyrk keynote  Oracle CloudWorld 2023.txt")
