<a href="https://colab.research.google.com/github/akshathmangudi/SummarizeVideo/blob/main/whisper_trial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
%pwd

'/content'

In [15]:
import os
from pathlib import Path
from pytube import YouTube
from tqdm import tqdm

In [13]:
def link_to_audio(yt_link):
    """
    The following function converts a given YouTube link and converts it .mp3 (audio) file.

    Arguments:
    yt_link: The YouTube link

    Returns: a .mp3 file inside the /audio directory.
    """

    # The root path and output path being defined here.
    root_path = Path.cwd()
    output_path = root_path / "audio"

    # Calling the constructor for the link and only extracting the audio.
    youtube = YouTube(yt_link)
    video = youtube.streams.filter(only_audio=True).first()

    # If the output path does not exist, create one.
    if output_path.is_dir():
        print(f"{output_path} already exists.")
    else:
        print(f"{output_path} not found, creating one...")
        output_path.mkdir(parents=True, exist_ok=True)

    # Download the file and save it into the 'audio' directory
    out_file = video.download(output_path=output_path)
    base, ext = os.path.splitext(out_file)

    # Rename the file and save it as .mp3
    result_file = base + '.mp3'
    os.rename(out_file, result_file)

In [18]:
def find_audio_files(root_path: Path) -> list:
    audio_path = root_path / "audio"

    mp3_files = []

    for file in audio_path.glob('*.mp3'):
        mp3_files.append(file)

    return mp3_files


def audio_to_text(mp3_list: list, root_dir: Path, audio_dir: Path, output_dir: Path):
    print("Loading whisper...")
    model = whisper.load_model("base")
    print("Loading complete")

    if output_dir.is_dir():
      print(f"{output_dir} already exists.")
    else:
      print(f"{output_dir} does not exist, creating one...")
      output_dir.mkdir(parents=True, exist_ok=True)

    dir_length = len(mp3_list)
    print(f"Number of audio files: {dir_length}")

    with tqdm(total=dir_length, desc="Transcribing files") as pbar:
        for file_path in audio_dir.rglob("*.mp3"):
            audio_data = whisper.load_audio(str(file_path))
            result_text = model.transcribe(audio_data, fp16=False, verbose=True)["text"]

            text_name = file_path.stem
            text_file_path = output_dir / f"{text_name}.txt"
            with text_file_path.open("w") as file:
                file.write(result_text)
            pbar.update(1)
    print("Transcription complete")

In [20]:
root_path = Path.cwd()
output_path = root_path / "text"
audio_path = root_path / "audio"

if __name__ == "__main__":
  link_to_audio("https://www.youtube.com/watch?v=aQk-gCqq30k&t=1s")
  audio_to_text(find_audio_files(root_path), root_dir=root_path, audio_dir=audio_path, output_dir = output_path)

/content/audio already exists.
Loading whisper...
Loading complete
/content/text does not exist, creating one...
Number of audio files: 1


Transcribing files:   0%|          | 0/1 [00:00<?, ?it/s]

Detecting language using up to the first 30 seconds. Use `--language` to specify the language
Detected language: English
[00:00.000 --> 00:03.200]  Morning, son.
[00:03.200 --> 00:07.840]  Are you ready to start with your first job?
[00:07.840 --> 00:09.840]  Nervous?
[00:09.840 --> 00:14.960]  Yes, that I feel so nervous.
[00:14.960 --> 00:21.000]  What will happen if I fail at this job?
[00:21.000 --> 00:24.120]  It's understandable that you feel nervous.
[00:24.120 --> 00:30.480]  I remember I failed like that in my first job too.
[00:30.480 --> 00:31.480]  Really?
[00:31.480 --> 00:33.240]  And what did you do?
[00:33.240 --> 00:34.960]  Did you fail?
[00:34.960 --> 00:37.960]  Did you succeed?
[00:37.960 --> 00:42.480]  Well, I was nervous.
[00:42.480 --> 00:46.080]  But then I learned a lot of things.
[00:46.080 --> 00:49.760]  I can help you with that.
[00:49.760 --> 00:54.800]  I can give you some tips to succeed in your first job.
[00:54.800 --> 00:58.320]  I have learned a lo

Transcribing files: 100%|██████████| 1/1 [00:15<00:00, 15.24s/it]

[10:08.720 --> 10:12.960]  And if you want to support this channel, you can join us.
[10:12.960 --> 10:16.080]  Or click on the super thanks button.
[10:16.080 --> 10:18.800]  Thank you very much for your support.
[10:18.800 --> 10:19.320]  Take care.
Transcription complete



